lima/ppir: add ppir_node to ppir_src

[mesa.git] / src / gallium / drivers / iris / iris_bufmgr.h
diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h

index 13b877669a0beb2bc1af69166b4320270fd1ad60..f05a71edf982b008e62eb66b604151572a2f8bc1 100644 (file)
--- a/src/gallium/drivers/iris/iris_bufmgr.h
+++ b/src/gallium/drivers/iris/iris_bufmgr.h
@@ -31,10 +31,63 @@
  #include "util/macros.h"
  #include "util/u_atomic.h"
  #include "util/list.h"
+#include "pipe/p_defines.h"
  
  struct gen_device_info;
  struct pipe_debug_callback;
  
+/**
+ * Memory zones.  When allocating a buffer, you can request that it is
+ * placed into a specific region of the virtual address space (PPGTT).
+ *
+ * Most buffers can go anywhere (IRIS_MEMZONE_OTHER).  Some buffers are
+ * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
+ * a maximum 4GB size for each region, so we need to restrict those
+ * buffers to be within 4GB of the base.  Each memory zone corresponds
+ * to a particular base address.
+ *
+ * We lay out the virtual address space as follows:
+ *
+ * - [0,   4K): Nothing            (empty page for null address)
+ * - [4K,  4G): Shaders            (Instruction Base Address)
+ * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
+ * - [8G, 12G): Dynamic            (Dynamic State Base Address)
+ * - [12G, *):  Other              (everything else in the full 48-bit VMA)
+ *
+ * A special buffer for border color lives at the start of the dynamic state
+ * memory zone.  This unfortunately has to be handled specially because the
+ * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
+ *
+ * Each GL context uses a separate GEM context, which technically gives them
+ * each a separate VMA.  However, we assign address globally, so buffers will
+ * have the same address in all GEM contexts.  This lets us have a single BO
+ * field for the address, which is easy and cheap.
+ */
+enum iris_memory_zone {
+   IRIS_MEMZONE_SHADER,
+   IRIS_MEMZONE_BINDER,
+   IRIS_MEMZONE_SURFACE,
+   IRIS_MEMZONE_DYNAMIC,
+   IRIS_MEMZONE_OTHER,
+
+   IRIS_MEMZONE_BORDER_COLOR_POOL,
+};
+
+/* Intentionally exclude single buffer "zones" */
+#define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
+
+#define IRIS_BINDER_SIZE (64 * 1024)
+#define IRIS_MAX_BINDERS 100
+
+#define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
+#define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
+#define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
+#define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
+#define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))
+
+#define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
+#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
+
  struct iris_bo {
     /**
      * Size in bytes of the buffer object.
@@ -44,13 +97,6 @@ struct iris_bo {
      */
     uint64_t size;
  
-   /**
-    * Alignment requirement for object
-    *
-    * Used for GTT mapping & pinning the object.
-    */
-   uint64_t align;
-
     /** Buffer manager context associated with this buffer object */
     struct iris_bufmgr *bufmgr;
  
@@ -58,31 +104,11 @@ struct iris_bo {
     uint32_t gem_handle;
  
     /**
-    * Offset of the buffer inside the Graphics Translation Table.
+    * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
+    * Translation Table).
      *
-    * This is effectively our GPU address for the buffer and we use it
-    * as our base for all state pointers into the buffer. However, since the
-    * kernel may be forced to move it around during the course of the
-    * buffer's lifetime, we can only know where the buffer was on the last
-    * execbuf. We presume, and are usually right, that the buffer will not
-    * move and so we use that last offset for the next batch and by doing
-    * so we can avoid having the kernel perform a relocation fixup pass as
-    * our pointers inside the batch will be using the correct base offset.
-    *
-    * Since we do use it as a base address for the next batch of pointers,
-    * the kernel treats our offset as a request, and if possible will
-    * arrange the buffer to placed at that address (trying to balance
-    * the cost of buffer migration versus the cost of performing
-    * relocations). Furthermore, we can force the kernel to place the buffer,
-    * or report a failure if we specified a conflicting offset, at our chosen
-    * offset by specifying EXEC_OBJECT_PINNED.
-    *
-    * Note the GTT may be either per context, or shared globally across the
-    * system. On a shared system, our buffers have to contend for address
-    * space with both aperture mappings and framebuffers and so are more
-    * likely to be moved. On a full ppGTT system, each batch exists in its
-    * own GTT, and so each buffer may have their own offset within each
-    * context.
+    * Although each hardware context has its own VMA, we assign BO's to the
+    * same address in all contexts, for simplicity.
      */
     uint64_t gtt_offset;
  
@@ -93,6 +119,10 @@ struct iris_bo {
      * It should not be considered authoritative, but can be used to avoid a
      * linear walk of the validation list in the common case by guessing that
      * exec_bos[bo->index] == bo and confirming whether that's the case.
+    *
+    * XXX: this is not ideal now that we have more than one batch per context,
+    * XXX: as the index will flop back and forth between the render index and
+    * XXX: compute index...
      */
     unsigned index;
  
@@ -150,10 +180,18 @@ struct iris_bo {
      * Boolean of whether this buffer is cache coherent
      */
     bool cache_coherent;
+
+   /**
+    * Boolean of whether this buffer points into user memory
+    */
+   bool userptr;
+
+   /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
+   uint32_t hash;
  };
  
-#define BO_ALLOC_BUSY       (1<<0)
-#define BO_ALLOC_ZEROED     (1<<1)
+#define BO_ALLOC_ZEROED     (1<<0)
+#define BO_ALLOC_COHERENT   (1<<1)
  
  /**
   * Allocate a buffer object.
@@ -162,8 +200,10 @@ struct iris_bo {
   * address space or graphics device aperture.  They must be mapped
   * using iris_bo_map() to be used by the CPU.
   */
-struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr, const char *name,
-                              uint64_t size, uint64_t alignment);
+struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
+                              const char *name,
+                              uint64_t size,
+                              enum iris_memory_zone memzone);
  
  /**
   * Allocate a tiled buffer object.
@@ -179,10 +219,17 @@ struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr, const char *name,
  struct iris_bo *iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr,
                                      const char *name,
                                      uint64_t size,
+                                    uint32_t alignment,
+                                    enum iris_memory_zone memzone,
                                      uint32_t tiling_mode,
                                      uint32_t pitch,
                                      unsigned flags);
  
+struct iris_bo *
+iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
+                       void *ptr, size_t size,
+                       enum iris_memory_zone memzone);
+
  /** Takes a reference on a buffer object */
  static inline void
  iris_bo_reference(struct iris_bo *bo)
@@ -196,15 +243,18 @@ iris_bo_reference(struct iris_bo *bo)
   */
  void iris_bo_unreference(struct iris_bo *bo);
  
-#define MAP_READ          0x01
-#define MAP_WRITE         0x02
-#define MAP_ASYNC         0x20
-#define MAP_PERSISTENT    0x40
-#define MAP_COHERENT      0x80
+#define MAP_READ          PIPE_TRANSFER_READ
+#define MAP_WRITE         PIPE_TRANSFER_WRITE
+#define MAP_ASYNC         PIPE_TRANSFER_UNSYNCHRONIZED
+#define MAP_PERSISTENT    PIPE_TRANSFER_PERSISTENT
+#define MAP_COHERENT      PIPE_TRANSFER_COHERENT
  /* internal */
  #define MAP_INTERNAL_MASK (0xff << 24)
  #define MAP_RAW           (0x01 << 24)
  
+#define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
+                           MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
+
  /**
   * Maps the buffer into userspace.
   *
@@ -220,9 +270,6 @@ MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
   */
  static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
  
-/** Write data into an object. */
-int iris_bo_subdata(struct iris_bo *bo, uint64_t offset,
-                   uint64_t size, const void *data);
  /**
   * Waits for rendering to an object by the GPU to have completed.
   *
@@ -285,6 +332,7 @@ void iris_bufmgr_enable_reuse(struct iris_bufmgr *bufmgr);
  int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
  
  uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
+uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
  
  #define IRIS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
  #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
@@ -304,5 +352,23 @@ int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
  
  int drm_ioctl(int fd, unsigned long request, void *arg);
  
+/**
+ * Returns the BO's address relative to the appropriate base address.
+ *
+ * All of our base addresses are programmed to the start of a 4GB region,
+ * so simply returning the bottom 32 bits of the BO address will give us
+ * the offset from whatever base address corresponds to that memory region.
+ */
+static inline uint32_t
+iris_bo_offset_from_base_address(struct iris_bo *bo)
+{
+   /* This only works for buffers in the memory zones corresponding to a
+    * base address - the top, unbounded memory zone doesn't have a base.
+    */
+   assert(bo->gtt_offset < IRIS_MEMZONE_OTHER_START);
+   return bo->gtt_offset;
+}
+
+enum iris_memory_zone iris_memzone_for_address(uint64_t address);
  
  #endif /* IRIS_BUFMGR_H */