turnip: Port krh's packing macros from freedreno to tu.
authorEric Anholt <eric@anholt.net>
Thu, 16 Jan 2020 23:38:37 +0000 (15:38 -0800)
committerMarge Bot <eric+marge@anholt.net>
Thu, 23 Jan 2020 22:46:09 +0000 (22:46 +0000)
This introduces some minor unpacking of the temporary fd_reg_pair structs
to code that previously was packing a whole register field.

In the pack wrapper in tu_cs.h, I added some explanatory docs, dropped the
relocs handling since we don't need it, and removed the extra regs[] in
the __ONE_REG() macro (which was causing gcc's optimizer to fall on its
face in my release build).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3455>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_cs.h
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h

index e8098dd72e19fdf30b1427d713f077d56dfbf854..0158cca09127172db67cafb265339b14ef3f7839 100644 (file)
@@ -29,7 +29,6 @@
 
 #include "registers/adreno_pm4.xml.h"
 #include "registers/adreno_common.xml.h"
-#include "registers/a6xx.xml.h"
 
 #include "vk_format.h"
 
@@ -405,8 +404,8 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
 
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
    tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
-   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
-   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size));
+   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)).value);
+   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size).value);
    tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
    tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset);
 
@@ -460,8 +459,8 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
       tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
                         A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
                         A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
-      tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
-      tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size));
+      tu_cs_emit(cs, A6XX_RB_MRT_PITCH(i, tu_image_stride(iview->image, iview->base_mip)).value);
+      tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(i, iview->image->layout.layer_size).value);
       tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
       tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset);
 
@@ -612,8 +611,8 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
                      COND(iview->image->layout.ubwc_size,
                           A6XX_RB_BLIT_DST_INFO_FLAGS));
    tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
-   tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
-   tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
+   tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)).value);
+   tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size).value);
 
    if (iview->image->layout.ubwc_size) {
       tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
index e5c47d005bead3e102e8cc61bc1528aedd273ece..e61b82e3c7d41c98dff6a053e194123e7a874947 100644 (file)
@@ -207,4 +207,82 @@ tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
       tu_cs_emit_ib(cs, target->entries + i);
 }
 
+#define fd_reg_pair tu_reg_value
+#define __bo_type struct tu_bo *
+
+#include "a6xx.xml.h"
+#include "a6xx-pack.xml.h"
+
+#define __assert_eq(a, b)                                               \
+   do {                                                                 \
+      if ((a) != (b)) {                                                 \
+         fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
+         assert((a) == (b));                                            \
+      }                                                                 \
+   } while (0)
+
+#define __ONE_REG(i, regs)                                      \
+   do {                                                         \
+      if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {            \
+         __assert_eq(regs[0].reg + i, regs[i].reg);             \
+         if (regs[i].bo) {                                      \
+            uint64_t v = regs[i].bo->iova + regs[i].bo_offset;  \
+            v >>= regs[i].bo_shift;                             \
+            v |= regs[i].value;                                 \
+                                                                \
+            *p++ = v;                                           \
+            *p++ = v >> 32;                                     \
+         } else {                                               \
+            *p++ = regs[i].value;                               \
+            if (regs[i].is_address)                             \
+               *p++ = regs[i].value >> 32;                      \
+         }                                                      \
+      }                                                         \
+   } while (0)
+
+/* Emits a sequence of register writes in order using a pkt4.  This will check
+ * (at runtime on a !NDEBUG build) that the registers were actually set up in
+ * order in the code.
+ *
+ * Note that references to buffers aren't automatically added to the CS,
+ * unlike in freedreno.  We are clever in various places to avoid duplicating
+ * the reference add work.
+ *
+ * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
+ * address without having a reference to a BO, since the .dword field in the
+ * register's struct is only 32-bit wide.  We should fix this in the pack
+ * codegen later.
+ */
+#define tu_cs_emit_regs(cs, ...) do {                   \
+   const struct fd_reg_pair regs[] = { __VA_ARGS__ };   \
+   unsigned count = ARRAY_SIZE(regs);                   \
+                                                        \
+   STATIC_ASSERT(count > 0);                            \
+   STATIC_ASSERT(count <= 16);                          \
+                                                        \
+   uint32_t *p = cs->cur;                               \
+   *p++ = CP_TYPE4_PKT | count |                        \
+      (tu_odd_parity_bit(count) << 7) |                 \
+      ((regs[0].reg & 0x3ffff) << 8) |                  \
+      ((tu_odd_parity_bit(regs[0].reg) << 27));         \
+                                                        \
+   __ONE_REG( 0, regs);                                 \
+   __ONE_REG( 1, regs);                                 \
+   __ONE_REG( 2, regs);                                 \
+   __ONE_REG( 3, regs);                                 \
+   __ONE_REG( 4, regs);                                 \
+   __ONE_REG( 5, regs);                                 \
+   __ONE_REG( 6, regs);                                 \
+   __ONE_REG( 7, regs);                                 \
+   __ONE_REG( 8, regs);                                 \
+   __ONE_REG( 9, regs);                                 \
+   __ONE_REG(10, regs);                                 \
+   __ONE_REG(11, regs);                                 \
+   __ONE_REG(12, regs);                                 \
+   __ONE_REG(13, regs);                                 \
+   __ONE_REG(14, regs);                                 \
+   __ONE_REG(15, regs);                                 \
+   cs->cur = p;                                         \
+   } while (0)
+
 #endif /* TU_CS_H */
index c6624dd493209ebaa4f068a3d94a154ce1a8c0d3..8fe9b974db25085f0ab5b5c3a0039c770792f1a4 100644 (file)
@@ -1185,12 +1185,12 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
    guardband_adj.height = tu6_guardband_adj(max.y - min.y);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]));
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]));
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]));
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]));
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]));
-   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]));
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]).value);
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]).value);
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]).value);
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]).value);
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]).value);
+   tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]).value);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
    tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) |
@@ -1237,7 +1237,7 @@ tu6_emit_point_size(struct tu_cs *cs)
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
    tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
                      A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
-   tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f));
+   tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f).value);
 }
 
 static uint32_t
@@ -1284,9 +1284,9 @@ tu6_emit_depth_bias(struct tu_cs *cs,
                     float slope_factor)
 {
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
-   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor));
-   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor));
-   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp));
+   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value);
+   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value);
+   tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
 }
 
 static void
index 79de5dc82de0df2626cd4190a2e8a9ba1b25ae29..43f676dc7a42fa2e1ddf18d9079317e0b653cbeb 100644 (file)
@@ -955,6 +955,19 @@ struct tu_cmd_buffer
    bool wait_for_idle;
 };
 
+/* Temporary struct for tracking a register state to be written, used by
+ * a6xx-pack.h and tu_cs_emit_regs()
+ */
+struct tu_reg_value {
+   uint32_t reg;
+   uint64_t value;
+   bool is_address;
+   struct tu_bo *bo;
+   bool bo_write;
+   uint32_t bo_offset;
+   uint32_t bo_shift;
+};
+
 unsigned
 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
                      struct tu_cs *cs,