freedreno/a6xx: add OUT_PKT()
authorRob Clark <robdclark@chromium.org>
Wed, 29 Apr 2020 16:58:38 +0000 (09:58 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 30 Apr 2020 20:03:17 +0000 (20:03 +0000)
Similar to OUT_REG(), this has the benefits of:

1. No more messing up pkt size
2. Detects errors of mixing up the order of dwords in the packet
3. Optimizes to more efficient code

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4813>

src/freedreno/registers/adreno_pm4.xml
src/freedreno/registers/meson.build
src/gallium/drivers/freedreno/a6xx/fd6_pack.h

index 3735b59992dbf618d2b135bb334d78541d044f67..fb2d8f4f639a2d75dadcf512f8f08b8af47a6dc6 100644 (file)
@@ -623,6 +623,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
        <reg32 offset="2" name="2">
                <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/>
        </reg32>
+       <reg64 offset="1" name="EXT_SRC_ADDR" type="address"/>
 </domain>
 
 <bitset name="vgt_draw_initiator" inline="yes">
@@ -705,13 +706,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
                <reg32 offset="5" name="5">
                        <bitfield name="INDX_BASE_HI" low="0" high="31"/>
                </reg32>
+               <reg64 offset="4" name="INDX_BASE" type="address"/>
                <reg32 offset="6" name="6">
                        <bitfield name="INDX_SIZE" low="0" high="31"/>
                </reg32>
        </stripe>
 
        <reg32 offset="4" name="4">
-               <bitfield name="INDX_BASE" low="0" high="31"/>
+               <bitfield name="INDX_BASE" low="0" high="31" type="address"/>
        </reg32>
 
        <reg32 offset="5" name="5">
@@ -721,13 +723,19 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
 
 <domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-">
        <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
-       <reg32 offset="1" name="1">
-               <bitfield name="INDIRECT" low="0" high="31"/>
-       </reg32>
+       <strip variants="A4XX">
+               <reg32 offset="1" name="1">
+                       <bitfield name="INDIRECT" low="0" high="31"/>
+               </reg32>
+       </strip>
        <stripe variants="A5XX-">
+               <reg32 offset="1" name="1">
+                       <bitfield name="INDIRECT_LO" low="0" high="31"/>
+               </reg32>
                <reg32 offset="2" name="2">
                        <bitfield name="INDIRECT_HI" low="0" high="31"/>
                </reg32>
+               <reg64 offset="1" name="INDIRECT" type="address"/>
        </stripe>
 </domain>
 
@@ -752,6 +760,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
                <reg32 offset="2" name="2">
                        <bitfield name="INDX_BASE_HI" low="0" high="31"/>
                </reg32>
+               <reg64 offset="1" name="INDX_BASE" type="address"/>
                <reg32 offset="3" name="3">
                        <!-- max # of elements in index buffer -->
                        <bitfield name="MAX_INDICES" low="0" high="31" type="uint"/>
@@ -762,6 +771,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
                <reg32 offset="5" name="5">
                        <bitfield name="INDIRECT_HI" low="0" high="31"/>
                </reg32>
+               <reg64 offset="4" name="INDIRECT" type="address"/>
        </stripe>
 </domain>
 
index 33a9fce32028b9daf123f9b66cc944f179c6925c..26335ce608d3a9e8e23653d4d855faf08ab77979 100644 (file)
@@ -47,3 +47,10 @@ freedreno_xml_header_files += custom_target(
     command : [prog_python, '@INPUT@', '--pack-structs'],
     capture : true,
   )
+freedreno_xml_header_files += custom_target(
+    'adreno-pm4-pack.xml.h',
+     input : ['gen_header.py', 'adreno_pm4.xml'],
+     output : 'adreno-pm4-pack.xml.h',
+     command : [prog_python, '@INPUT@', '--pack-structs'],
+     capture : true,
+  )
index b3b9bf9244dfdbff1dedecbdcc0799be511deba2..f9063122bfc33f9cb6973acd23dbbc6bf8e1f253 100644 (file)
@@ -39,6 +39,7 @@ struct fd_reg_pair {
 #define __bo_type struct fd_bo *
 
 #include "a6xx-pack.xml.h"
+#include "adreno-pm4-pack.xml.h"
 
 #define __assert_eq(a, b)                                                                                                      \
        do {                                                                                                                            \
@@ -51,7 +52,8 @@ struct fd_reg_pair {
 #define __ONE_REG(i, ...)                                                                                      \
        do {                                                                                                                    \
                const struct fd_reg_pair regs[] = { __VA_ARGS__ };                      \
-               if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {                          \
+               /* NOTE: allow regs[0].reg==0, this happens in OUT_PKT() */     \
+               if (i < ARRAY_SIZE(regs) && (i == 0 || regs[i].reg > 0)) {      \
                        __assert_eq(regs[0].reg + i, regs[i].reg);                              \
                        if (regs[i].bo) {                                                                               \
                                struct fd_reloc reloc = {                                                       \
@@ -109,4 +111,77 @@ struct fd_reg_pair {
                ring->cur = p;                                                                          \
        } while (0)
 
+#define OUT_PKT(ring, opcode, ...)                                                     \
+       do {                                                                                                    \
+               const struct fd_reg_pair regs[] = { __VA_ARGS__ };      \
+               unsigned count = ARRAY_SIZE(regs);                                      \
+                                                                                                                       \
+               STATIC_ASSERT(count <= 16);                                                     \
+                                                                                                                       \
+               BEGIN_RING(ring, count + 1);                                            \
+               uint32_t *p = ring->cur;                                                        \
+               *p++ = CP_TYPE7_PKT | count |                                           \
+                       (_odd_parity_bit(count) << 15) |                                \
+                       ((opcode & 0x7f) << 16) |                                               \
+                       ((_odd_parity_bit(opcode) << 23));                              \
+                                                                                                                       \
+               __ONE_REG( 0, __VA_ARGS__);                                                     \
+               __ONE_REG( 1, __VA_ARGS__);                                                     \
+               __ONE_REG( 2, __VA_ARGS__);                                                     \
+               __ONE_REG( 3, __VA_ARGS__);                                                     \
+               __ONE_REG( 4, __VA_ARGS__);                                                     \
+               __ONE_REG( 5, __VA_ARGS__);                                                     \
+               __ONE_REG( 6, __VA_ARGS__);                                                     \
+               __ONE_REG( 7, __VA_ARGS__);                                                     \
+               __ONE_REG( 8, __VA_ARGS__);                                                     \
+               __ONE_REG( 9, __VA_ARGS__);                                                     \
+               __ONE_REG(10, __VA_ARGS__);                                                     \
+               __ONE_REG(11, __VA_ARGS__);                                                     \
+               __ONE_REG(12, __VA_ARGS__);                                                     \
+               __ONE_REG(13, __VA_ARGS__);                                                     \
+               __ONE_REG(14, __VA_ARGS__);                                                     \
+               __ONE_REG(15, __VA_ARGS__);                                                     \
+               ring->cur = p;                                                                          \
+       } while (0)
+
+/* similar to OUT_PKT() but appends specified # of dwords
+ * copied for buf to the end of the packet (ie. for use-
+ * cases like CP_LOAD_STATE)
+ */
+#define OUT_PKTBUF(ring, opcode, dwords, sizedwords, ...)      \
+       do {                                                                                                    \
+               const struct fd_reg_pair regs[] = { __VA_ARGS__ };      \
+               unsigned count = ARRAY_SIZE(regs);                                      \
+                                                                                                                       \
+               STATIC_ASSERT(count <= 16);                                                     \
+               count += sizedwords;                                                            \
+                                                                                                                       \
+               BEGIN_RING(ring, count + 1);                                            \
+               uint32_t *p = ring->cur;                                                        \
+               *p++ = CP_TYPE7_PKT | count |                                           \
+                       (_odd_parity_bit(count) << 15) |                                \
+                       ((opcode & 0x7f) << 16) |                                               \
+                       ((_odd_parity_bit(opcode) << 23));                              \
+                                                                                                                       \
+               __ONE_REG( 0, __VA_ARGS__);                                                     \
+               __ONE_REG( 1, __VA_ARGS__);                                                     \
+               __ONE_REG( 2, __VA_ARGS__);                                                     \
+               __ONE_REG( 3, __VA_ARGS__);                                                     \
+               __ONE_REG( 4, __VA_ARGS__);                                                     \
+               __ONE_REG( 5, __VA_ARGS__);                                                     \
+               __ONE_REG( 6, __VA_ARGS__);                                                     \
+               __ONE_REG( 7, __VA_ARGS__);                                                     \
+               __ONE_REG( 8, __VA_ARGS__);                                                     \
+               __ONE_REG( 9, __VA_ARGS__);                                                     \
+               __ONE_REG(10, __VA_ARGS__);                                                     \
+               __ONE_REG(11, __VA_ARGS__);                                                     \
+               __ONE_REG(12, __VA_ARGS__);                                                     \
+               __ONE_REG(13, __VA_ARGS__);                                                     \
+               __ONE_REG(14, __VA_ARGS__);                                                     \
+               __ONE_REG(15, __VA_ARGS__);                                                     \
+               memcpy(p, dwords, 4 * sizedwords);                                      \
+               p += sizedwords;                                                                        \
+               ring->cur = p;                                                                          \
+       } while (0)
+
 #endif /* FD6_PACK_H */