lima/ppir: enable vectorize optimization
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw.c
index 4e34216caf03ab70854e7cc1777a5e643eff9402..672b3e10eedec86c7c1fa997034475521ebb5077 100644 (file)
@@ -122,7 +122,23 @@ nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    FREE(hq);
 }
 
-static boolean
+static void
+nvc0_hw_query_write_compute_invocations(struct nvc0_context *nvc0,
+                                        struct nvc0_hw_query *hq,
+                                        uint32_t offset)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   nouveau_pushbuf_space(push, 16, 0, 8);
+   PUSH_REFN(push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+   BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER_TO_QUERY), 4);
+   PUSH_DATA (push, nvc0->compute_invocations);
+   PUSH_DATAh(push, nvc0->compute_invocations);
+   PUSH_DATAh(push, hq->bo->offset + hq->offset + offset);
+   PUSH_DATA (push, hq->bo->offset + hq->offset + offset);
+}
+
+static bool
 nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -198,6 +214,7 @@ nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
       nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
       nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
       nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xc0 + 0xa0);
       break;
    default:
       break;
@@ -270,6 +287,7 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
       nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
       nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
       nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xa0);
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
       /* This query is not issued on GPU because disjoint is forced to false */
@@ -286,9 +304,9 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
       nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
 }
 
-static boolean
+static bool
 nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
-                         boolean wait, union pipe_query_result *result)
+                         bool wait, union pipe_query_result *result)
 {
    struct nvc0_hw_query *hq = nvc0_hw_query(q);
    uint64_t *res64 = (uint64_t*)result;
@@ -352,9 +370,8 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
       res64[0] = data64[1] - data64[3];
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
-      for (i = 0; i < 10; ++i)
+      for (i = 0; i < 11; ++i)
          res64[i] = data64[i * 2] - data64[24 + i * 2];
-      result->pipeline_statistics.cs_invocations = 0;
       break;
    case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
       res32[0] = hq->data[1];
@@ -370,7 +387,7 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
 static void
 nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
                                   struct nvc0_query *q,
-                                  boolean wait,
+                                  bool wait,
                                   enum pipe_query_value_type result_type,
                                   int index,
                                   struct pipe_resource *resource,