a6xx: Add more CP packets
authorConnor Abbott <cwabbott0@gmail.com>
Mon, 16 Dec 2019 16:17:38 +0000 (17:17 +0100)
committerConnor Abbott <cwabbott0@gmail.com>
Wed, 18 Dec 2019 22:08:55 +0000 (23:08 +0100)
And add fields uncovered by looking at the firmware. I think this covers
all the memory, register, and scratch manipulation opcodes that exist on
A6xx, plus one additional nice find for Vulkan and describing a
previously unknown opcode and documenting CP_WAIT_REG_MEM.

Note that the bits for the CP_REG_TO_MEM count, as well as the formula
for computing the actual count for both CP_REG_TO_MEM and CP_MEM_TO_REG,
are changed because the A630 SQE firmware actually does something
different. I haven't investigated older microcodes to see whether this
extends back to A5xx and A4xx, but the only non-A6xx uses of this
field result in the same bit-pattern when using the A6xx bit range and
formula, so it should be safe to change the definition universally.

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3116>

src/freedreno/registers/adreno_pm4.xml
src/freedreno/vulkan/tu_cmd_buffer.c
src/gallium/drivers/freedreno/a4xx/fd4_query.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
src/gallium/drivers/freedreno/a6xx/fd6_query.c

index 3a7865b489d4fe6317afff263409f49c12fd766b..533dcf0fb38e6cfb2432fc65df7ae00b021e8712 100644 (file)
@@ -219,7 +219,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
        <doc>load sequencer instruction memory (code embedded in packet)</doc>
        <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/>
        <doc>load constants from a location in memory</doc>
-       <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e"/>
+       <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/>
        <doc>selective invalidation of state pointers</doc>
        <value name="CP_INVALIDATE_STATE" value="0x3b"/>
        <doc>dynamically changes shader instruction memory partition</doc>
@@ -266,7 +266,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
        <doc>Load a buffer with pre-fetch enabled</doc>
        <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/>
        <doc>Set bin (?)</doc>
-       <value name="CP_SET_BIN" value="0x4c"/>
+       <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/>
 
        <doc>test 2 memory locations to dword values specified</doc>
        <value name="CP_TEST_TWO_MEMS" value="0x71"/>
@@ -310,7 +310,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
                for A4xx
                Write to register with address that does not fit into type-0 pkt
        </doc>
-       <value name="CP_WIDE_REG_WRITE" value="0x74"/>
+       <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/>
 
        <doc>copy from ME scratch RAM to a register</doc>
        <value name="CP_SCRATCH_TO_REG" value="0x4d"/>
@@ -413,6 +413,15 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
 
        <!-- jmptable entry used to handle type4 packet on a5xx+: -->
        <value name="PKT4" value="0x04" variants="A5XX,A6XX"/>
+
+       <!-- TODO do these exist on A5xx? -->
+       <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/>
+       <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX"/>
+       <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX"/>
+       <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/>
+       <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/>
+       <value name="CP_MEMCPY" value="0x75" variants="A6XX"/>
+       <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX"/>
 <!--
 unknown a6xx opcodes:
 
@@ -420,7 +429,6 @@ opcode: (null) (14) (5 dwords)
 opcode: (null) (55) (4 dwords)
 opcode: (null) (6d) (4 dwords)
  -->
-       <value name="CP_UNK_A6XX_14" value="0x14" variants="A6XX"/>
        <value name="CP_UNK_A6XX_55" value="0x55" variants="A6XX"/>
 
        <!--
@@ -830,14 +838,66 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
        </reg32>
 </domain>
 
+<domain name="CP_SET_BIN_DATA5_OFFSET" width="32">
+       <doc>
+                Like CP_SET_BIN_DATA5, but set the pointers as offsets from the
+                pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful
+                for Vulkan where these values aren't known when the command
+                stream is recorded.
+       </doc>
+       <reg32 offset="0" name="0">
+               <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: -->
+               <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/>
+               <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: -->
+               <bitfield name="VSC_N" low="22" high="26" type="uint"/>
+       </reg32>
+       <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS -->
+       <reg32 offset="1" name="1">
+               <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/>
+       </reg32>
+       <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)-->
+       <reg32 offset="2" name="2">
+               <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/>
+       </reg32>
+       <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS -->
+       <reg32 offset="3" name="3">
+               <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/>
+       </reg32>
+</domain>
+
+<domain name="CP_REG_RMW" width="32">
+       <doc>
+                Modifies DST_REG using two sources that can either be registers
+                or immediates. If SRC1_ADD is set, then do the following:
+
+                       $dst = (($dst &amp; $src0) rot $rotate) + $src1
+
+               Otherwise:
+
+                       $dst = (($dst &amp; $src0) rot $rotate) | $src1
+
+               Here "rot" means rotate left.
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="DST_REG" low="0" high="17" type="hex"/>
+               <bitfield name="ROTATE" low="24" high="28" type="uint"/>
+               <bitfield name="SRC1_ADD" pos="29" type="boolean"/>
+               <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/>
+               <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="SRC0" low="0" high="31" type="uint"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="SRC1" low="0" high="31" type="uint"/>
+       </reg32>
+</domain>
+
 <domain name="CP_REG_TO_MEM" width="32">
        <reg32 offset="0" name="0">
                <bitfield name="REG" low="0" high="15" type="hex"/>
-               <!--
-               number of regsiters/dwords copied is CNT+1.. unsure
-               about # of bits
-                -->
-               <bitfield name="CNT" low="19" high="29" type="uint"/>
+               <!-- number of registers/dwords copied is max(CNT, 1). -->
+               <bitfield name="CNT" low="18" high="29" type="uint"/>
                <bitfield name="64B" pos="30" type="boolean"/>
                <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
        </reg32>
@@ -849,13 +909,62 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
        </reg32>
 </domain>
 
+<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32">
+       <doc>
+                Like CP_REG_TO_MEM, but the memory address to write to can be
+                offsetted using either one or two registers or scratch
+                registers.
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="REG" low="0" high="15" type="hex"/>
+               <!-- number of registers/dwords copied is max(CNT, 1). -->
+               <bitfield name="CNT" low="18" high="29" type="uint"/>
+               <bitfield name="64B" pos="30" type="boolean"/>
+               <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="DEST" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="2" name="2" variants="A5XX-">
+               <bitfield name="DEST_HI" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="OFFSET0" low="0" high="17" type="hex"/>
+               <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/>
+       </reg32>
+       <!-- followed by an optional identical OFFSET1 dword -->
+</domain>
+
+<domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32">
+       <doc>
+                Like CP_REG_TO_MEM, but the memory address to write to can be
+                offsetted using a DWORD in memory.
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="REG" low="0" high="15" type="hex"/>
+               <!-- number of registers/dwords copied is max(CNT, 1). -->
+               <bitfield name="CNT" low="18" high="29" type="uint"/>
+               <bitfield name="64B" pos="30" type="boolean"/>
+               <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="DEST" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="2" name="2" variants="A5XX-">
+               <bitfield name="DEST_HI" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="4" name="4">
+               <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/>
+       </reg32>
+</domain>
+
 <domain name="CP_MEM_TO_REG" width="32">
        <reg32 offset="0" name="0">
                <bitfield name="REG" low="0" high="15" type="hex"/>
-               <!--
-               number of regsiters/dwords copied is CNT+1.. unsure
-               about # of bits
-                -->
+               <!-- number of registers/dwords copied is max(CNT, 1). -->
                <bitfield name="CNT" low="19" high="29" type="uint"/>
                <bitfield name="64B" pos="30" type="boolean"/>
                <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
@@ -880,6 +989,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
 
                <!-- if set treat src/dst as 64bit values -->
                <bitfield name="DOUBLE" pos="29" type="boolean"/>
+               <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand -->
+               <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/>
+               <!-- some other kind of wait -->
+               <bitfield name="UNK31" pos="31" type="boolean"/>
        </reg32>
        <!--
        followed by sequence of addresses.. the first is the
@@ -891,6 +1004,61 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
         -->
 </domain>
 
+<domain name="CP_MEMCPY" width="32">
+       <reg32 offset="0" name="0">
+               <bitfield name="DWORDS" low="0" high="31" type="uint"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="SRC_LO" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="SRC_HI" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="DST_LO" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="4" name="4">
+               <bitfield name="DST_HI" low="0" high="31" type="hex"/>
+       </reg32>
+</domain>
+
+<domain name="CP_REG_TO_SCRATCH" width="32">
+       <reg32 offset="0" name="0">
+               <bitfield name="REG" low="0" high="17" type="hex"/>
+               <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+               <!-- number of registers/dwords copied is CNT + 1. -->
+               <bitfield name="CNT" low="24" high="26" type="uint"/>
+       </reg32>
+</domain>
+
+<domain name="CP_SCRATCH_TO_REG" width="32">
+       <reg32 offset="0" name="0">
+               <bitfield name="REG" low="0" high="17" type="hex"/>
+               <!-- note: CP_MEM_TO_REG always sets this when writing to the register -->
+               <bitfield name="UNK18" pos="18" type="boolean"/>
+               <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+               <!-- number of registers/dwords copied is CNT + 1. -->
+               <bitfield name="CNT" low="24" high="26" type="uint"/>
+       </reg32>
+</domain>
+
+<domain name="CP_SCRATCH_WRITE" width="32">
+       <reg32 offset="0" name="0">
+               <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+       </reg32>
+       <!-- followed by one or more DWORDs to write to scratch registers -->
+</domain>
+
+<domain name="CP_MEM_WRITE" width="32">
+       <reg32 offset="0" name="0">
+               <bitfield name="ADDR_LO" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="ADDR_HI" low="0" high="31"/>
+       </reg32>
+       <!-- followed by the DWORDs to write -->
+</domain>
+
 <enum name="cp_cond_function">
        <value value="0" name="WRITE_ALWAYS"/>
        <value value="1" name="WRITE_LT"/>
@@ -927,7 +1095,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
 <domain name="CP_COND_WRITE5" width="32">
        <reg32 offset="0" name="0">
                <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/>
+               <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/>
+                <!-- if both POLL_MEMORY and POLL_SCRATCH are false, it polls a register at POLL_ADDR_LO instead. -->
                <bitfield name="POLL_MEMORY" pos="4" type="boolean"/>
+               <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/>
                <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
        </reg32>
        <reg32 offset="1" name="1">
@@ -953,6 +1124,71 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
        </reg32>
 </domain>
 
+<domain name="CP_WAIT_MEM_GTE" width="32">
+        <doc>
+                Wait until a memory value is greater than or equal to the
+                reference, using signed comparison.
+       </doc>
+       <reg32 offset="0" name="0">
+               <!-- Reserved for flags, presumably? Unused in FW -->
+               <bitfield name="RESERVED" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="REF" low="0" high="31"/>
+       </reg32>
+</domain>
+
+<domain name="CP_WAIT_REG_MEM" width="32">
+        <doc>
+                This uses the same internal comparison as CP_COND_WRITE,
+                but waits until the comparison is true instead. It busy-loops in
+                the CP for the given number of cycles before trying again.
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/>
+               <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/>
+               <bitfield name="POLL_MEMORY" pos="4" type="boolean"/>
+               <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/>
+               <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="REF" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="4" name="4">
+               <bitfield name="MASK" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="5" name="5">
+               <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/>
+       </reg32>
+</domain>
+
+<domain name="CP_WAIT_TWO_REGS" width="32">
+       <doc>
+               Waits for REG0 to not be 0 or REG1 to not equal REF
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="REG0" low="0" high="17" type="hex"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="REG1" low="0" high="17" type="hex"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="REF" low="0" high="31" type="uint"/>
+       </reg32>
+</domain>
+
 <domain name="CP_DISPATCH_COMPUTE" width="32">
        <reg32 offset="0" name="0"/>
        <reg32 offset="1" name="1">
@@ -1201,7 +1437,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
                <bitfield name="REG" low="0" high="11"/>
                <!-- the bit to test -->
                <bitfield name="BIT" low="20" high="24" type="uint"/>
-               <bitfield name="UNK25" pos="25" type="boolean"/>
+               <!-- execute CP_WAIT_FOR_ME beforehand -->
+               <bitfield name="WAIT_FOR_ME" pos="25" type="boolean"/>
        </reg32>
 </domain>
 
@@ -1215,5 +1452,31 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
        </reg32>
 </domain>
 
+<domain name="CP_COND_EXEC" width="32">
+       <doc>
+                Executes the following DWORDs of commands if the dword at ADDR0
+                is not equal to 0 and the dword at ADDR1 is less than REF
+                (signed comparison).
+       </doc>
+       <reg32 offset="0" name="0">
+               <bitfield name="ADDR0_LO" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="ADDR0_HI" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="2" name="2">
+               <bitfield name="ADDR1_LO" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="3" name="3">
+               <bitfield name="ADDR1_HI" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="4" name="4">
+               <bitfield name="REF" low="0" high="31"/>
+       </reg32>
+       <reg32 offset="1" name="1">
+               <bitfield name="DWORDS" low="0" high="31" type="uint"/>
+       </reg32>
+</domain>
+
 </database>
 
index 49ea11acfaa18b09e04025f619ac8d83b9db65ba..caa1a54af14fafbbef7b9a844dc14a075455d378 100644 (file)
@@ -738,7 +738,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
       tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
       tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
                      A6XX_CP_REG_TEST_0_BIT(0) |
-                     A6XX_CP_REG_TEST_0_UNK25);
+                     A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
       tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
       tu_cs_emit(cs, 0x10000000);
@@ -1124,7 +1124,7 @@ tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
 
-   tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4);
+   tu_cs_emit_pkt7(cs, CP_WAIT_MEM_GTE, 4);
    tu_cs_emit(cs, 0x00000000);
    tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
    tu_cs_emit(cs, seqno);
@@ -1217,7 +1217,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
    tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
          A6XX_CP_REG_TEST_0_BIT(0) |
-         A6XX_CP_REG_TEST_0_UNK25);
+         A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
    tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
    tu_cs_emit(cs, 0x10000000);
@@ -1231,7 +1231,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
        */
       tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
       tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
-            CP_REG_TO_MEM_0_CNT(1 - 1));
+            CP_REG_TO_MEM_0_CNT(0));
       tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW);
 
       tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
@@ -1401,7 +1401,7 @@ tu6_render_tile(struct tu_cmd_buffer *cmd,
       tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
       tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
                      A6XX_CP_REG_TEST_0_BIT(0) |
-                     A6XX_CP_REG_TEST_0_UNK25);
+                     A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
       tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
       tu_cs_emit(cs, 0x10000000);
index bea63f4c28bb6c59244256eae441132f6a4bdd4a..1f1ce8e87711618aea1c85e09720bfafeeea2644 100644 (file)
@@ -166,7 +166,7 @@ time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
        OUT_PKT3(ring, CP_REG_TO_MEM, 2);
        OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
                        CP_REG_TO_MEM_0_64B |
-                       CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
+                       CP_REG_TO_MEM_0_CNT(2)); /* write 2 regs to mem */
        OUT_RELOCW(ring, scratch_bo, sample_off, 0, 0);
 
        /* ok... here we really *would* like to use the CP_SET_CONSTANT
@@ -188,7 +188,7 @@ time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring)
        OUT_PKT3(ring, CP_REG_TO_MEM, 2);
        OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
                        CP_REG_TO_MEM_0_ACCUMULATE |
-                       CP_REG_TO_MEM_0_CNT(1-1));       /* readback 1 regs */
+                       CP_REG_TO_MEM_0_CNT(0));       /* readback 1 regs */
        OUT_RELOCW(ring, scratch_bo, addr_off, 0, 0);
 
        /* now copy that back to CP_ME_NRT_ADDR: */
index 0f9b68b1c4bda2d87fc966e3dc41ba54df54900d..9e4cbf0a978b21b971ed7ffcf5d815098748323e 100644 (file)
@@ -754,7 +754,7 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3
                        OUT_PKT7(ring, CP_MEM_TO_REG, 3);
                        OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
                                        CP_MEM_TO_REG_0_64B | CP_MEM_TO_REG_0_ACCUMULATE |
-                                       CP_MEM_TO_REG_0_CNT(1 - 1));
+                                       CP_MEM_TO_REG_0_CNT(0));
                        OUT_RELOC(ring, control_ptr(fd6_context(ctx), flush_base[i].offset));
                }
 
@@ -1457,7 +1457,7 @@ fd6_framebuffer_barrier(struct fd_context *ctx)
 
        fd6_event_write(batch, ring, 0x31, false);
 
-       OUT_PKT7(ring, CP_UNK_A6XX_14, 4);
+       OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
        OUT_RING(ring, 0x00000000);
        OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
        OUT_RING(ring, seqno);
index 7ca42adc37218afee61a29d1d20a316b2ffe5b8f..8e2134aec362e34f319972321ce0839f386b00a5 100644 (file)
@@ -186,7 +186,7 @@ fd6_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
 
        seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
 
-       OUT_PKT7(ring, CP_UNK_A6XX_14, 4);
+       OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
        OUT_RING(ring, 0x00000000);
        OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
        OUT_RING(ring, seqno);
index c12c06905d57660c765bc04ab378788b741977ad..a97f4742828663c01643867a446c0a867fe6903c 100644 (file)
@@ -442,7 +442,7 @@ emit_vsc_overflow_test(struct fd_batch *batch)
 
        OUT_PKT7(ring, CP_MEM_TO_REG, 3);
        OUT_RING(ring, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
-                       CP_MEM_TO_REG_0_CNT(1 - 1));
+                       CP_MEM_TO_REG_0_CNT(0));
        OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_scratch));  /* SRC_LO/HI */
 
        /*
@@ -461,7 +461,7 @@ emit_vsc_overflow_test(struct fd_batch *batch)
        OUT_PKT7(ring, CP_REG_TEST, 1);
        OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
                        A6XX_CP_REG_TEST_0_BIT(0) |
-                       A6XX_CP_REG_TEST_0_UNK25);
+                       A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
        OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
        OUT_RING(ring, 0x10000000);
@@ -568,7 +568,7 @@ emit_conditional_ib(struct fd_batch *batch, struct fd_tile *tile,
        OUT_PKT7(ring, CP_REG_TEST, 1);
        OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
                        A6XX_CP_REG_TEST_0_BIT(tile->n) |
-                       A6XX_CP_REG_TEST_0_UNK25);
+                       A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
        OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
        OUT_RING(ring, 0x10000000);
@@ -856,7 +856,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
                OUT_PKT7(ring, CP_REG_TEST, 1);
                OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
                                A6XX_CP_REG_TEST_0_BIT(0) |
-                               A6XX_CP_REG_TEST_0_UNK25);
+                               A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
                OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
                OUT_RING(ring, 0x10000000);
@@ -1332,7 +1332,7 @@ fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
                OUT_PKT7(ring, CP_REG_TEST, 1);
                OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
                                A6XX_CP_REG_TEST_0_BIT(0) |
-                               A6XX_CP_REG_TEST_0_UNK25);
+                               A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
 
                OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
                OUT_RING(ring, 0x10000000);
index 29ec167f54352328db9a6ad1405e845476ff2990..f58fff7b4bac43759f4f0502c05828031e0ca0ee 100644 (file)
@@ -325,7 +325,7 @@ primitives_generated_resume(struct fd_acc_query *aq, struct fd_batch *batch)
 
        OUT_PKT7(ring, CP_REG_TO_MEM, 3);
        OUT_RING(ring, CP_REG_TO_MEM_0_64B |
-                       CP_REG_TO_MEM_0_CNT(counter_count - 1) |
+                       CP_REG_TO_MEM_0_CNT(counter_count) |
                        CP_REG_TO_MEM_0_REG(counter_base));
        primitives_relocw(ring, aq, prim_start);
 
@@ -342,7 +342,7 @@ primitives_generated_pause(struct fd_acc_query *aq, struct fd_batch *batch)
        /* snapshot the end values: */
        OUT_PKT7(ring, CP_REG_TO_MEM, 3);
        OUT_RING(ring, CP_REG_TO_MEM_0_64B |
-                       CP_REG_TO_MEM_0_CNT(counter_count - 1) |
+                       CP_REG_TO_MEM_0_CNT(counter_count) |
                        CP_REG_TO_MEM_0_REG(counter_base));
        primitives_relocw(ring, aq, prim_stop);