nir: Add options to nir_lower_compute_system_values to control compute ID base lowering

[mesa.git] / src / intel / vulkan / genX_gpu_memcpy.c
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c

index a8e99d1a69528f061147f6f7c09f45ee6a1e6780..57dc55ec39415902037d389e710b60c3616934a5 100644 (file)
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -51,85 +51,17 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
     return 1 << MIN2(a_log2, b_log2);
  }
  
-void
-genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
-                           struct anv_bo *dst, uint32_t dst_offset,
-                           struct anv_bo *src, uint32_t src_offset,
-                           uint32_t size)
-{
-   /* This memcpy operates in units of dwords. */
-   assert(size % 4 == 0);
-   assert(dst_offset % 4 == 0);
-   assert(src_offset % 4 == 0);
-
-#if GEN_GEN == 7
-   /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
-    * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
-    * in-flight when they are issued even if the memory touched is not
-    * currently active for rendering.  The weird bit is that it is not the
-    * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
-    * rendering hangs such that the next stalling command after the
-    * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
-    *
-    * It is unclear exactly why this hang occurs.  Both MI commands come with
-    * warnings about the 3D pipeline but that doesn't seem to fully explain
-    * it.  My (Jason's) best theory is that it has something to do with the
-    * fact that we're using a GPU state register as our temporary and that
-    * something with reading/writing it is causing problems.
-    *
-    * In order to work around this issue, we emit a PIPE_CONTROL with the
-    * command streamer stall bit set.
-    */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
-   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-#endif
-
-   for (uint32_t i = 0; i < size; i += 4) {
-      const struct anv_address src_addr =
-         (struct anv_address) { src, src_offset + i};
-      const struct anv_address dst_addr =
-         (struct anv_address) { dst, dst_offset + i};
-#if GEN_GEN >= 8
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
-         cp.DestinationMemoryAddress = dst_addr;
-         cp.SourceMemoryAddress = src_addr;
-      }
-#else
-      /* IVB does not have a general purpose register for command streamer
-       * commands. Therefore, we use an alternate temporary register.
-       */
-#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
-         load.RegisterAddress = TEMP_REG;
-         load.MemoryAddress = src_addr;
-      }
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
-         store.RegisterAddress = TEMP_REG;
-         store.MemoryAddress = dst_addr;
-      }
-#undef TEMP_REG
-#endif
-   }
-   return;
-}
-
  void
  genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
-                           struct anv_bo *dst, uint32_t dst_offset,
-                           struct anv_bo *src, uint32_t src_offset,
+                           struct anv_address dst, struct anv_address src,
                             uint32_t size)
  {
     if (size == 0)
        return;
  
-   assert(dst_offset + size <= dst->size);
-   assert(src_offset + size <= src->size);
-
     /* The maximum copy block size is 4 32-bit components at a time. */
-   unsigned bs = 16;
-   bs = gcd_pow2_u64(bs, src_offset);
-   bs = gcd_pow2_u64(bs, dst_offset);
-   bs = gcd_pow2_u64(bs, size);
+   assert(size % 4 == 0);
+   unsigned bs = gcd_pow2_u64(16, size);
  
     enum isl_format format;
     switch (bs) {
@@ -146,6 +78,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
        genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
     }
  
+   genX(cmd_buffer_set_binding_for_gen8_vb_flush)(cmd_buffer, 32, src, size);
     genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
  
     genX(flush_pipeline_select_3d)(cmd_buffer);
@@ -156,14 +89,13 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
        &(struct GENX(VERTEX_BUFFER_STATE)) {
           .VertexBufferIndex = 32, /* Reserved for this */
           .AddressModifyEnable = true,
-         .BufferStartingAddress = { src, src_offset },
+         .BufferStartingAddress = src,
           .BufferPitch = bs,
+         .MOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
  #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
           .BufferSize = size,
  #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
-         .EndAddress = { src, src_offset + size - 1 },
+         .EndAddress = anv_address_add(src, size - 1),
  #endif
        });
  
@@ -180,6 +112,13 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
           .Component3Control = (bs >= 16) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0,
        });
  
+#if GEN_GEN >= 8
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
+      vfi.InstancingEnable = false;
+      vfi.VertexElementIndex = 0;
+   }
+#endif
+
  #if GEN_GEN >= 8
     anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), sgvs);
  #endif
@@ -215,20 +154,24 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
  
     genX(emit_urb_setup)(cmd_buffer->device, &cmd_buffer->batch,
                          cmd_buffer->state.current_l3_config,
-                        VK_SHADER_STAGE_VERTEX_BIT, entry_size);
+                        VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
  
     anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
+#if GEN_GEN < 12
        sob.SOBufferIndex = 0;
-      sob.SOBufferObjectControlState = GENX(MOCS);
-      sob.SurfaceBaseAddress = (struct anv_address) { dst, dst_offset };
+#else
+      sob._3DCommandOpcode = 0;
+      sob._3DCommandSubOpcode = SO_BUFFER_INDEX_0_CMD;
+#endif
+      sob.MOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
+      sob.SurfaceBaseAddress = dst;
  
  #if GEN_GEN >= 8
        sob.SOBufferEnable = true;
        sob.SurfaceSize = size / 4 - 1;
  #else
        sob.SurfacePitch = bs;
-      sob.SurfaceEndAddress = sob.SurfaceBaseAddress;
-      sob.SurfaceEndAddress.offset += size;
+      sob.SurfaceEndAddress = anv_address_add(dst, size);
  #endif
  
  #if GEN_GEN >= 8
@@ -284,6 +227,11 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
        vf.StatisticsEnable = false;
     }
  
+#if GEN_GEN >= 12
+   /* Disable Primitive Replication. */
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
+#endif
+
     anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
        prim.VertexAccessType         = SEQUENTIAL;
        prim.PrimitiveTopologyType    = _3DPRIM_POINTLIST;
@@ -294,5 +242,8 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
        prim.BaseVertexLocation       = 0;
     }
  
+   genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(cmd_buffer, SEQUENTIAL,
+                                                       1ull << 32);
+
     cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
  }