nvc0: fix valid range for shader buffers
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_sm.c
index 54e0140ba62c3f8643adb355c97fd770a0213ec3..d4721201bbaa84d59d8463e54ac04a3a45f1a676 100644 (file)
@@ -372,9 +372,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c0[0x0]
+    * mov b32 $r10 c7[0x620]
     * ext u32 $r8 $r12 0x414
-    * mov b32 $r11 c0[0x4]
+    * mov b32 $r11 c7[0x624]
     * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
     * ext u32 $r9 $r12 0x208
     * (not $p0) exit
@@ -392,7 +392,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * add b32 $r12 $c $r12 $r9
     * st b128 wt g[$r10d] $r0q
     * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
-    * mov b32 $r0 c0[0x8]
+    * mov b32 $r0 c7[0x628]
     * add b32 $r13 $r13 0x0 $c
     * $p1 st b128 wt g[$r12d+0x40] $r4q
     * st b32 wt g[$r12d+0x50] $r0
@@ -410,9 +410,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x2800400000029de4ULL,
+   0x28005c1880029de4ULL,
    0x7000c01050c21c03ULL,
-   0x280040001002dde4ULL,
+   0x28005c189002dde4ULL,
    0x204282020042e047ULL,
    0x7000c00820c25c03ULL,
    0x80000000000021e7ULL,
@@ -430,7 +430,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x4801000024c31c03ULL,
    0x9400000000a01fc5ULL,
    0x200002e04202c047ULL,
-   0x2800400020001de4ULL,
+   0x28005c18a0001de4ULL,
    0x0800000000d35c42ULL,
    0x9400000100c107c5ULL,
    0x9400000140c01f85ULL,
@@ -453,9 +453,9 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0x86400000051c001aULL,
    0x86400000059c001eULL,
    0xdb201c007f9c201eULL,
-   0x64c03c00001c002aULL,
+   0x64c03ce0c41c002aULL,
    0xc00000020a1c3021ULL,
-   0x64c03c00009c002eULL,
+   0x64c03ce0c49c002eULL,
    0x0810a0808010b810ULL,
    0xc0000001041c3025ULL,
    0x180000000020003cULL,
@@ -473,7 +473,7 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0xe0840000049c3032ULL,
    0xfe800000001c2800ULL,
    0x080000b81080b010ULL,
-   0x64c03c00011c0002ULL,
+   0x64c03ce0c51c0002ULL,
    0xe08040007f9c3436ULL,
    0xfe80000020043010ULL,
    0xfc800000281c3000ULL,
@@ -1105,14 +1105,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c0[0x0]
-    * mov b32 $r11 c0[0x4]
+    * mov b32 $r10 c15[0x620]
+    * mov b32 $r11 c15[0x624]
     * ext u32 $r8 $r9 0x414
     * (not $p0) exit
     * mul $r8 u32 $r8 u32 48
     * add b32 $r10 $c $r10 $r8
     * add b32 $r11 $r11 0x0 $c
-    * mov b32 $r8 c0[0x8]
+    * mov b32 $r8 c15[0x628]
     * st b128 wt g[$r10d+0x00] $r0q
     * st b128 wt g[$r10d+0x10] $r4q
     * st b32 wt g[$r10d+0x20] $r8
@@ -1128,14 +1128,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x2800400000029de4ULL,
-   0x280040001002dde4ULL,
+   0x28007c1880029de4ULL,
+   0x28007c189002dde4ULL,
    0x7000c01050921c03ULL,
    0x80000000000021e7ULL,
    0x10000000c0821c02ULL,
    0x4801000020a29c03ULL,
    0x0800000000b2dc42ULL,
-   0x2800400020021de4ULL,
+   0x28007c18a0021de4ULL,
    0x9400000000a01fc5ULL,
    0x9400000040a11fc5ULL,
    0x9400000080a21f85ULL,
@@ -1813,6 +1813,40 @@ nvc0_hw_sm_get_program(struct nvc0_screen *screen)
    return prog;
 }
 
+static inline void
+nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   uint64_t address;
+   const int s = 5;
+
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
+
+   PUSH_SPACE(push, 11);
+
+   if (screen->base.class_3d >= NVE4_3D_CLASS) {
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, address + NVC0_CB_AUX_MP_INFO);
+      PUSH_DATA (push, address + NVC0_CB_AUX_MP_INFO);
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+      PUSH_DATA (push, 3 * 4);
+      PUSH_DATA (push, 0x1);
+      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 3);
+      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+   } else {
+      BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+      PUSH_DATAh(push, address);
+      PUSH_DATA (push, address);
+      BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 3);
+      PUSH_DATA (push, NVC0_CB_AUX_MP_INFO);
+   }
+   PUSH_DATA (push, (hq->bo->offset + hq->base_offset));
+   PUSH_DATAh(push, (hq->bo->offset + hq->base_offset));
+   PUSH_DATA (push, hq->sequence);
+}
+
 static void
 nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 {
@@ -1857,11 +1891,10 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
    PUSH_SPACE(push, 1);
    IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
 
-   pipe->bind_compute_state(pipe, screen->pm.prog);
-   input[0] = (hq->bo->offset + hq->base_offset);
-   input[1] = (hq->bo->offset + hq->base_offset) >> 32;
-   input[2] = hq->sequence;
+   /* upload input data for the compute shader which reads MP counters */
+   nvc0_hw_sm_upload_input(nvc0, hq);
 
+   pipe->bind_compute_state(pipe, screen->pm.prog);
    for (i = 0; i < 3; i++) {
       info.block[i] = block[i];
       info.grid[i] = grid[i];