turnip: Port krh's packing macros from freedreno to tu.
[mesa.git] / src / freedreno / vulkan / tu_cs.h
index cfac00f200dd214a06cce2760df76eee178a70b7..e61b82e3c7d41c98dff6a053e194123e7a874947 100644 (file)
 #include "registers/adreno_pm4.xml.h"
 
 void
-tu_cs_init(struct tu_cs *cs, uint32_t initial_size);
+tu_cs_init(struct tu_cs *cs, enum tu_cs_mode mode, uint32_t initial_size);
+
+void
+tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end);
 
 void
 tu_cs_finish(struct tu_device *dev, struct tu_cs *cs);
@@ -39,6 +42,22 @@ tu_cs_begin(struct tu_cs *cs);
 void
 tu_cs_end(struct tu_cs *cs);
 
+VkResult
+tu_cs_begin_sub_stream(struct tu_device *dev,
+                       struct tu_cs *cs,
+                       uint32_t size,
+                       struct tu_cs *sub_cs);
+
+VkResult
+tu_cs_alloc(struct tu_device *dev,
+            struct tu_cs *cs,
+            uint32_t count,
+            uint32_t size,
+            struct ts_cs_memory *memory);
+
+struct tu_cs_entry
+tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
+
 VkResult
 tu_cs_reserve_space(struct tu_device *dev,
                     struct tu_cs *cs,
@@ -47,12 +66,27 @@ tu_cs_reserve_space(struct tu_device *dev,
 void
 tu_cs_reset(struct tu_device *dev, struct tu_cs *cs);
 
+VkResult
+tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
+
+/**
+ * Discard all entries.  This allows \a cs to be reused while keeping the
+ * existing BOs and command packets intact.
+ */
+static inline void
+tu_cs_discard_entries(struct tu_cs *cs)
+{
+   assert(cs->mode == TU_CS_MODE_GROW);
+   cs->entry_count = 0;
+}
+
 /**
  * Get the size needed for tu_cs_emit_call.
  */
 static inline uint32_t
 tu_cs_get_call_size(const struct tu_cs *cs)
 {
+   assert(cs->mode == TU_CS_MODE_GROW);
    /* each CP_INDIRECT_BUFFER needs 4 dwords */
    return cs->entry_count * 4;
 }
@@ -74,11 +108,22 @@ tu_cs_sanity_check(const struct tu_cs *cs)
 static inline void
 tu_cs_emit(struct tu_cs *cs, uint32_t value)
 {
-   assert(cs->cur < cs->end);
+   assert(cs->cur < cs->reserved_end);
    *cs->cur = value;
    ++cs->cur;
 }
 
+/**
+ * Emit an array of uint32_t into a command stream, without boundary checking.
+ */
+static inline void
+tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
+{
+   assert(cs->cur + length <= cs->reserved_end);
+   memcpy(cs->cur, values, sizeof(uint32_t) * length);
+   cs->cur += length;
+}
+
 static inline unsigned
 tu_odd_parity_bit(unsigned val)
 {
@@ -140,8 +185,10 @@ tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
 static inline void
 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
 {
-   assert(entry->offset % sizeof(uint32_t) == 0);
+   assert(entry->bo);
+   assert(entry->size && entry->offset + entry->size <= entry->bo->size);
    assert(entry->size % sizeof(uint32_t) == 0);
+   assert(entry->offset % sizeof(uint32_t) == 0);
 
    tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
    tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
@@ -155,8 +202,87 @@ tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
 static inline void
 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
 {
+   assert(target->mode == TU_CS_MODE_GROW);
    for (uint32_t i = 0; i < target->entry_count; i++)
       tu_cs_emit_ib(cs, target->entries + i);
 }
 
+#define fd_reg_pair tu_reg_value
+#define __bo_type struct tu_bo *
+
+#include "a6xx.xml.h"
+#include "a6xx-pack.xml.h"
+
+#define __assert_eq(a, b)                                               \
+   do {                                                                 \
+      if ((a) != (b)) {                                                 \
+         fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
+         assert((a) == (b));                                            \
+      }                                                                 \
+   } while (0)
+
+#define __ONE_REG(i, regs)                                      \
+   do {                                                         \
+      if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {            \
+         __assert_eq(regs[0].reg + i, regs[i].reg);             \
+         if (regs[i].bo) {                                      \
+            uint64_t v = regs[i].bo->iova + regs[i].bo_offset;  \
+            v >>= regs[i].bo_shift;                             \
+            v |= regs[i].value;                                 \
+                                                                \
+            *p++ = v;                                           \
+            *p++ = v >> 32;                                     \
+         } else {                                               \
+            *p++ = regs[i].value;                               \
+            if (regs[i].is_address)                             \
+               *p++ = regs[i].value >> 32;                      \
+         }                                                      \
+      }                                                         \
+   } while (0)
+
+/* Emits a sequence of register writes in order using a pkt4.  This will check
+ * (at runtime on a !NDEBUG build) that the registers were actually set up in
+ * order in the code.
+ *
+ * Note that references to buffers aren't automatically added to the CS,
+ * unlike in freedreno.  We are clever in various places to avoid duplicating
+ * the reference add work.
+ *
+ * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
+ * address without having a reference to a BO, since the .dword field in the
+ * register's struct is only 32-bit wide.  We should fix this in the pack
+ * codegen later.
+ */
+#define tu_cs_emit_regs(cs, ...) do {                   \
+   const struct fd_reg_pair regs[] = { __VA_ARGS__ };   \
+   unsigned count = ARRAY_SIZE(regs);                   \
+                                                        \
+   STATIC_ASSERT(count > 0);                            \
+   STATIC_ASSERT(count <= 16);                          \
+                                                        \
+   uint32_t *p = cs->cur;                               \
+   *p++ = CP_TYPE4_PKT | count |                        \
+      (tu_odd_parity_bit(count) << 7) |                 \
+      ((regs[0].reg & 0x3ffff) << 8) |                  \
+      ((tu_odd_parity_bit(regs[0].reg) << 27));         \
+                                                        \
+   __ONE_REG( 0, regs);                                 \
+   __ONE_REG( 1, regs);                                 \
+   __ONE_REG( 2, regs);                                 \
+   __ONE_REG( 3, regs);                                 \
+   __ONE_REG( 4, regs);                                 \
+   __ONE_REG( 5, regs);                                 \
+   __ONE_REG( 6, regs);                                 \
+   __ONE_REG( 7, regs);                                 \
+   __ONE_REG( 8, regs);                                 \
+   __ONE_REG( 9, regs);                                 \
+   __ONE_REG(10, regs);                                 \
+   __ONE_REG(11, regs);                                 \
+   __ONE_REG(12, regs);                                 \
+   __ONE_REG(13, regs);                                 \
+   __ONE_REG(14, regs);                                 \
+   __ONE_REG(15, regs);                                 \
+   cs->cur = p;                                         \
+   } while (0)
+
 #endif /* TU_CS_H */