src/gallium/drivers/nouveau/nv50/nv50_query_hw.c

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  * Copyright 2015 Samuel Pitoiset
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
  25
  26 #include "nv50/nv50_context.h"
  27 #include "nv50/nv50_query_hw.h"
  28 #include "nv_object.xml.h"
  29
  30 #define NV50_HW_QUERY_STATE_READY   0
  31 #define NV50_HW_QUERY_STATE_ACTIVE  1
  32 #define NV50_HW_QUERY_STATE_ENDED   2
  33 #define NV50_HW_QUERY_STATE_FLUSHED 3
  34
  35 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
  36  * (since we use only a single GPU channel per screen) will not work properly.
  37  *
  38  * The first is not that big of an issue because OpenGL does not allow nested
  39  * queries anyway.
  40  */
  41
  42 #define NV50_HW_QUERY_ALLOC_SPACE 256
  43
  44 static bool
  45 nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
  46                        int size)
  47 {
  48    struct nv50_screen *screen = nv50->screen;
  49    struct nv50_hw_query *hq = nv50_hw_query(q);
  50    int ret;
  51
  52    if (hq->bo) {
  53       nouveau_bo_ref(NULL, &hq->bo);
  54       if (hq->mm) {
  55          if (hq->state == NV50_HW_QUERY_STATE_READY)
  56             nouveau_mm_free(hq->mm);
  57          else
  58             nouveau_fence_work(screen->base.fence.current,
  59                                nouveau_mm_free_work, hq->mm);
  60       }
  61    }
  62    if (size) {
  63       hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
  64                                    &hq->bo, &hq->base_offset);
  65       if (!hq->bo)
  66          return false;
  67       hq->offset = hq->base_offset;
  68
  69       ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
  70       if (ret) {
  71          nv50_hw_query_allocate(nv50, q, 0);
  72          return false;
  73       }
  74       hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
  75    }
  76    return true;
  77 }
  78
  79 static void
  80 nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
  81                unsigned offset, uint32_t get)
  82 {
  83    struct nv50_hw_query *hq = nv50_hw_query(q);
  84
  85    offset += hq->offset;
  86
  87    PUSH_SPACE(push, 5);
  88    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
  89    BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
  90    PUSH_DATAh(push, hq->bo->offset + offset);
  91    PUSH_DATA (push, hq->bo->offset + offset);
  92    PUSH_DATA (push, hq->sequence);
  93    PUSH_DATA (push, get);
  94 }
  95
  96 static inline void
  97 nv50_hw_query_update(struct nv50_query *q)
  98 {
  99    struct nv50_hw_query *hq = nv50_hw_query(q);
 100
 101    if (hq->is64bit) {
 102       if (nouveau_fence_signalled(hq->fence))
 103          hq->state = NV50_HW_QUERY_STATE_READY;
 104    } else {
 105       if (hq->data[0] == hq->sequence)
 106          hq->state = NV50_HW_QUERY_STATE_READY;
 107    }
 108 }
 109
 110 static void
 111 nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
 112 {
 113    struct nv50_hw_query *hq = nv50_hw_query(q);
 114    nv50_hw_query_allocate(nv50, q, 0);
 115    nouveau_fence_ref(NULL, &hq->fence);
 116    FREE(hq);
 117 }
 118
 119 static boolean
 120 nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 121 {
 122    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 123    struct nv50_hw_query *hq = nv50_hw_query(q);
 124
 125    /* For occlusion queries we have to change the storage, because a previous
 126     * query might set the initial render condition to false even *after* we re-
 127     * initialized it to true.
 128     */
 129    if (hq->rotate) {
 130       hq->offset += hq->rotate;
 131       hq->data += hq->rotate / sizeof(*hq->data);
 132       if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
 133          nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
 134
 135       /* XXX: can we do this with the GPU, and sync with respect to a previous
 136        *  query ?
 137        */
 138       hq->data[0] = hq->sequence; /* initialize sequence */
 139       hq->data[1] = 1; /* initial render condition = true */
 140       hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
 141       hq->data[5] = 0;
 142    }
 143    if (!hq->is64bit)
 144       hq->data[0] = hq->sequence++; /* the previously used one */
 145
 146    switch (q->type) {
 147    case PIPE_QUERY_OCCLUSION_COUNTER:
 148       hq->nesting = nv50->screen->num_occlusion_queries_active++;
 149       if (hq->nesting) {
 150          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
 151       } else {
 152          PUSH_SPACE(push, 4);
 153          BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
 154          PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
 155          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 156          PUSH_DATA (push, 1);
 157       }
 158       break;
 159    case PIPE_QUERY_PRIMITIVES_GENERATED:
 160       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 161       break;
 162    case PIPE_QUERY_PRIMITIVES_EMITTED:
 163       nv50_hw_query_get(push, q, 0x10, 0x05805002);
 164       break;
 165    case PIPE_QUERY_SO_STATISTICS:
 166       nv50_hw_query_get(push, q, 0x20, 0x05805002);
 167       nv50_hw_query_get(push, q, 0x30, 0x06805002);
 168       break;
 169    case PIPE_QUERY_PIPELINE_STATISTICS:
 170       nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
 171       nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
 172       nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
 173       nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
 174       nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
 175       nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
 176       nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
 177       nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
 178       break;
 179    case PIPE_QUERY_TIME_ELAPSED:
 180       nv50_hw_query_get(push, q, 0x10, 0x00005002);
 181       break;
 182    default:
 183       assert(0);
 184       return false;
 185    }
 186    hq->state = NV50_HW_QUERY_STATE_ACTIVE;
 187    return true;
 188 }
 189
 190 static void
 191 nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
 192 {
 193    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 194    struct nv50_hw_query *hq = nv50_hw_query(q);
 195
 196    hq->state = NV50_HW_QUERY_STATE_ENDED;
 197
 198    switch (q->type) {
 199    case PIPE_QUERY_OCCLUSION_COUNTER:
 200       nv50_hw_query_get(push, q, 0, 0x0100f002);
 201       if (--nv50->screen->num_occlusion_queries_active == 0) {
 202          PUSH_SPACE(push, 2);
 203          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 204          PUSH_DATA (push, 0);
 205       }
 206       break;
 207    case PIPE_QUERY_PRIMITIVES_GENERATED:
 208       nv50_hw_query_get(push, q, 0, 0x06805002);
 209       break;
 210    case PIPE_QUERY_PRIMITIVES_EMITTED:
 211       nv50_hw_query_get(push, q, 0, 0x05805002);
 212       break;
 213    case PIPE_QUERY_SO_STATISTICS:
 214       nv50_hw_query_get(push, q, 0x00, 0x05805002);
 215       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 216       break;
 217    case PIPE_QUERY_PIPELINE_STATISTICS:
 218       nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
 219       nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
 220       nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
 221       nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
 222       nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
 223       nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
 224       nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
 225       nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
 226       break;
 227    case PIPE_QUERY_TIMESTAMP:
 228       hq->sequence++;
 229       /* fall through */
 230    case PIPE_QUERY_TIME_ELAPSED:
 231       nv50_hw_query_get(push, q, 0, 0x00005002);
 232       break;
 233    case PIPE_QUERY_GPU_FINISHED:
 234       hq->sequence++;
 235       nv50_hw_query_get(push, q, 0, 0x1000f010);
 236       break;
 237    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 238       hq->sequence++;
 239       nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
 240       break;
 241    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 242       /* This query is not issued on GPU because disjoint is forced to false */
 243       hq->state = NV50_HW_QUERY_STATE_READY;
 244       break;
 245    default:
 246       assert(0);
 247       break;
 248    }
 249    if (hq->is64bit)
 250       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 251 }
 252
 253 static boolean
 254 nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
 255                          boolean wait, union pipe_query_result *result)
 256 {
 257    struct nv50_hw_query *hq = nv50_hw_query(q);
 258    uint64_t *res64 = (uint64_t *)result;
 259    uint32_t *res32 = (uint32_t *)result;
 260    uint8_t *res8 = (uint8_t *)result;
 261    uint64_t *data64 = (uint64_t *)hq->data;
 262    int i;
 263
 264    if (hq->state != NV50_HW_QUERY_STATE_READY)
 265       nv50_hw_query_update(q);
 266
 267    if (hq->state != NV50_HW_QUERY_STATE_READY) {
 268       if (!wait) {
 269          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
 270          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
 271             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
 272             PUSH_KICK(nv50->base.pushbuf);
 273          }
 274          return false;
 275       }
 276       if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
 277          return false;
 278    }
 279    hq->state = NV50_HW_QUERY_STATE_READY;
 280
 281    switch (q->type) {
 282    case PIPE_QUERY_GPU_FINISHED:
 283       res8[0] = true;
 284       break;
 285    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
 286       res64[0] = hq->data[1] - hq->data[5];
 287       break;
 288    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
 289    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
 290       res64[0] = data64[0] - data64[2];
 291       break;
 292    case PIPE_QUERY_SO_STATISTICS:
 293       res64[0] = data64[0] - data64[4];
 294       res64[1] = data64[2] - data64[6];
 295       break;
 296    case PIPE_QUERY_PIPELINE_STATISTICS:
 297       for (i = 0; i < 8; ++i)
 298          res64[i] = data64[i * 2] - data64[16 + i * 2];
 299       break;
 300    case PIPE_QUERY_TIMESTAMP:
 301       res64[0] = data64[1];
 302       break;
 303    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 304       res64[0] = 1000000000;
 305       res8[8] = false;
 306       break;
 307    case PIPE_QUERY_TIME_ELAPSED:
 308       res64[0] = data64[1] - data64[3];
 309       break;
 310    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 311       res32[0] = hq->data[1];
 312       break;
 313    default:
 314       assert(0);
 315       return false;
 316    }
 317
 318    return true;
 319 }
 320
 321 static const struct nv50_query_funcs hw_query_funcs = {
 322    .destroy_query = nv50_hw_destroy_query,
 323    .begin_query = nv50_hw_begin_query,
 324    .end_query = nv50_hw_end_query,
 325    .get_query_result = nv50_hw_get_query_result,
 326 };
 327
 328 struct nv50_query *
 329 nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
 330 {
 331    struct nv50_hw_query *hq;
 332    struct nv50_query *q;
 333
 334    hq = CALLOC_STRUCT(nv50_hw_query);
 335    if (!hq)
 336       return NULL;
 337
 338    q = &hq->base;
 339    q->funcs = &hw_query_funcs;
 340    q->type = type;
 341
 342    switch (q->type) {
 343    case PIPE_QUERY_OCCLUSION_COUNTER:
 344       hq->rotate = 32;
 345       break;
 346    case PIPE_QUERY_PRIMITIVES_GENERATED:
 347    case PIPE_QUERY_PRIMITIVES_EMITTED:
 348    case PIPE_QUERY_SO_STATISTICS:
 349    case PIPE_QUERY_PIPELINE_STATISTICS:
 350       hq->is64bit = true;
 351       break;
 352    case PIPE_QUERY_TIME_ELAPSED:
 353    case PIPE_QUERY_TIMESTAMP:
 354    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 355    case PIPE_QUERY_GPU_FINISHED:
 356    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 357       break;
 358    default:
 359       debug_printf("invalid query type: %u\n", type);
 360       FREE(q);
 361       return NULL;
 362    }
 363
 364    if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
 365       FREE(hq);
 366       return NULL;
 367    }
 368
 369    if (hq->rotate) {
 370       /* we advance before query_begin ! */
 371       hq->offset -= hq->rotate;
 372       hq->data -= hq->rotate / sizeof(*hq->data);
 373    }
 374
 375    return q;
 376 }
 377
 378 void
 379 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
 380                              struct nv50_query *q, unsigned result_offset)
 381 {
 382    struct nv50_hw_query *hq = nv50_hw_query(q);
 383
 384    nv50_hw_query_update(q);
 385    if (hq->state != NV50_HW_QUERY_STATE_READY)
 386       nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
 387    hq->state = NV50_HW_QUERY_STATE_READY;
 388
 389    BEGIN_NV04(push, SUBC_3D(method), 1);
 390    PUSH_DATA (push, hq->data[result_offset / 4]);
 391 }
 392
 393 void
 394 nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
 395 {
 396    struct nv50_hw_query *hq = nv50_hw_query(q);
 397    unsigned offset = hq->offset;
 398
 399    PUSH_SPACE(push, 5);
 400    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 401    BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
 402    PUSH_DATAh(push, hq->bo->offset + offset);
 403    PUSH_DATA (push, hq->bo->offset + offset);
 404    PUSH_DATA (push, hq->sequence);
 405    PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 406 }