src/gallium/drivers/nouveau/nv50/nv50_query_hw.c

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  * Copyright 2015 Samuel Pitoiset
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
  25
  26 #include "nv50/nv50_context.h"
  27 #include "nv50/nv50_query_hw.h"
  28 #include "nv50/nv50_query_hw_metric.h"
  29 #include "nv50/nv50_query_hw_sm.h"
  30 #include "nv_object.xml.h"
  31
  32 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
  33  * (since we use only a single GPU channel per screen) will not work properly.
  34  *
  35  * The first is not that big of an issue because OpenGL does not allow nested
  36  * queries anyway.
  37  */
  38
  39 #define NV50_HW_QUERY_ALLOC_SPACE 256
  40
  41 bool
  42 nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
  43                        int size)
  44 {
  45    struct nv50_screen *screen = nv50->screen;
  46    struct nv50_hw_query *hq = nv50_hw_query(q);
  47    int ret;
  48
  49    if (hq->bo) {
  50       nouveau_bo_ref(NULL, &hq->bo);
  51       if (hq->mm) {
  52          if (hq->state == NV50_HW_QUERY_STATE_READY)
  53             nouveau_mm_free(hq->mm);
  54          else
  55             nouveau_fence_work(screen->base.fence.current,
  56                                nouveau_mm_free_work, hq->mm);
  57       }
  58    }
  59    if (size) {
  60       hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
  61                                    &hq->bo, &hq->base_offset);
  62       if (!hq->bo)
  63          return false;
  64       hq->offset = hq->base_offset;
  65
  66       ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
  67       if (ret) {
  68          nv50_hw_query_allocate(nv50, q, 0);
  69          return false;
  70       }
  71       hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
  72    }
  73    return true;
  74 }
  75
  76 static void
  77 nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
  78                unsigned offset, uint32_t get)
  79 {
  80    struct nv50_hw_query *hq = nv50_hw_query(q);
  81
  82    offset += hq->offset;
  83
  84    PUSH_SPACE(push, 5);
  85    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
  86    BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
  87    PUSH_DATAh(push, hq->bo->offset + offset);
  88    PUSH_DATA (push, hq->bo->offset + offset);
  89    PUSH_DATA (push, hq->sequence);
  90    PUSH_DATA (push, get);
  91 }
  92
  93 static inline void
  94 nv50_hw_query_update(struct nv50_query *q)
  95 {
  96    struct nv50_hw_query *hq = nv50_hw_query(q);
  97
  98    if (hq->is64bit) {
  99       if (nouveau_fence_signalled(hq->fence))
 100          hq->state = NV50_HW_QUERY_STATE_READY;
 101    } else {
 102       if (hq->data[0] == hq->sequence)
 103          hq->state = NV50_HW_QUERY_STATE_READY;
 104    }
 105 }
 106
 107 static void
 108 nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
 109 {
 110    struct nv50_hw_query *hq = nv50_hw_query(q);
 111
 112    if (hq->funcs && hq->funcs->destroy_query) {
 113       hq->funcs->destroy_query(nv50, hq);
 114       return;
 115    }
 116
 117    nv50_hw_query_allocate(nv50, q, 0);
 118    nouveau_fence_ref(NULL, &hq->fence);
 119    FREE(hq);
 120 }
 121
 122 static boolean
 123 nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 124 {
 125    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 126    struct nv50_hw_query *hq = nv50_hw_query(q);
 127
 128    if (hq->funcs && hq->funcs->begin_query)
 129       return hq->funcs->begin_query(nv50, hq);
 130
 131    /* For occlusion queries we have to change the storage, because a previous
 132     * query might set the initial render condition to false even *after* we re-
 133     * initialized it to true.
 134     */
 135    if (hq->rotate) {
 136       hq->offset += hq->rotate;
 137       hq->data += hq->rotate / sizeof(*hq->data);
 138       if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
 139          nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
 140
 141       /* XXX: can we do this with the GPU, and sync with respect to a previous
 142        *  query ?
 143        */
 144       hq->data[0] = hq->sequence; /* initialize sequence */
 145       hq->data[1] = 1; /* initial render condition = true */
 146       hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
 147       hq->data[5] = 0;
 148    }
 149    if (!hq->is64bit)
 150       hq->data[0] = hq->sequence++; /* the previously used one */
 151
 152    switch (q->type) {
 153    case PIPE_QUERY_OCCLUSION_COUNTER:
 154    case PIPE_QUERY_OCCLUSION_PREDICATE:
 155    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 156       if (nv50->screen->num_occlusion_queries_active++) {
 157          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
 158       } else {
 159          PUSH_SPACE(push, 4);
 160          BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
 161          PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
 162          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 163          PUSH_DATA (push, 1);
 164       }
 165       break;
 166    case PIPE_QUERY_PRIMITIVES_GENERATED:
 167       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 168       break;
 169    case PIPE_QUERY_PRIMITIVES_EMITTED:
 170       nv50_hw_query_get(push, q, 0x10, 0x05805002);
 171       break;
 172    case PIPE_QUERY_SO_STATISTICS:
 173       nv50_hw_query_get(push, q, 0x20, 0x05805002);
 174       nv50_hw_query_get(push, q, 0x30, 0x06805002);
 175       break;
 176    case PIPE_QUERY_PIPELINE_STATISTICS:
 177       nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
 178       nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
 179       nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
 180       nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
 181       nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
 182       nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
 183       nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
 184       nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
 185       break;
 186    case PIPE_QUERY_TIME_ELAPSED:
 187       nv50_hw_query_get(push, q, 0x10, 0x00005002);
 188       break;
 189    default:
 190       assert(0);
 191       return false;
 192    }
 193    hq->state = NV50_HW_QUERY_STATE_ACTIVE;
 194    return true;
 195 }
 196
 197 static void
 198 nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
 199 {
 200    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 201    struct nv50_hw_query *hq = nv50_hw_query(q);
 202
 203    if (hq->funcs && hq->funcs->end_query) {
 204       hq->funcs->end_query(nv50, hq);
 205       return;
 206    }
 207
 208    hq->state = NV50_HW_QUERY_STATE_ENDED;
 209
 210    switch (q->type) {
 211    case PIPE_QUERY_OCCLUSION_COUNTER:
 212    case PIPE_QUERY_OCCLUSION_PREDICATE:
 213    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 214       nv50_hw_query_get(push, q, 0, 0x0100f002);
 215       if (--nv50->screen->num_occlusion_queries_active == 0) {
 216          PUSH_SPACE(push, 2);
 217          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 218          PUSH_DATA (push, 0);
 219       }
 220       break;
 221    case PIPE_QUERY_PRIMITIVES_GENERATED:
 222       nv50_hw_query_get(push, q, 0, 0x06805002);
 223       break;
 224    case PIPE_QUERY_PRIMITIVES_EMITTED:
 225       nv50_hw_query_get(push, q, 0, 0x05805002);
 226       break;
 227    case PIPE_QUERY_SO_STATISTICS:
 228       nv50_hw_query_get(push, q, 0x00, 0x05805002);
 229       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 230       break;
 231    case PIPE_QUERY_PIPELINE_STATISTICS:
 232       nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
 233       nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
 234       nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
 235       nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
 236       nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
 237       nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
 238       nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
 239       nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
 240       break;
 241    case PIPE_QUERY_TIMESTAMP:
 242       hq->sequence++;
 243       /* fall through */
 244    case PIPE_QUERY_TIME_ELAPSED:
 245       nv50_hw_query_get(push, q, 0, 0x00005002);
 246       break;
 247    case PIPE_QUERY_GPU_FINISHED:
 248       hq->sequence++;
 249       nv50_hw_query_get(push, q, 0, 0x1000f010);
 250       break;
 251    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 252       hq->sequence++;
 253       nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
 254       break;
 255    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 256       /* This query is not issued on GPU because disjoint is forced to false */
 257       hq->state = NV50_HW_QUERY_STATE_READY;
 258       break;
 259    default:
 260       assert(0);
 261       break;
 262    }
 263    if (hq->is64bit)
 264       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 265 }
 266
 267 static boolean
 268 nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
 269                          boolean wait, union pipe_query_result *result)
 270 {
 271    struct nv50_hw_query *hq = nv50_hw_query(q);
 272    uint64_t *res64 = (uint64_t *)result;
 273    uint32_t *res32 = (uint32_t *)result;
 274    uint8_t *res8 = (uint8_t *)result;
 275    uint64_t *data64 = (uint64_t *)hq->data;
 276    int i;
 277
 278    if (hq->funcs && hq->funcs->get_query_result)
 279       return hq->funcs->get_query_result(nv50, hq, wait, result);
 280
 281    if (hq->state != NV50_HW_QUERY_STATE_READY)
 282       nv50_hw_query_update(q);
 283
 284    if (hq->state != NV50_HW_QUERY_STATE_READY) {
 285       if (!wait) {
 286          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
 287          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
 288             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
 289             PUSH_KICK(nv50->base.pushbuf);
 290          }
 291          return false;
 292       }
 293       if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
 294          return false;
 295    }
 296    hq->state = NV50_HW_QUERY_STATE_READY;
 297
 298    switch (q->type) {
 299    case PIPE_QUERY_GPU_FINISHED:
 300       res8[0] = true;
 301       break;
 302    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
 303       res64[0] = hq->data[1] - hq->data[5];
 304       break;
 305    case PIPE_QUERY_OCCLUSION_PREDICATE:
 306    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 307       res8[0] = hq->data[1] != hq->data[5];
 308       break;
 309    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
 310    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
 311       res64[0] = data64[0] - data64[2];
 312       break;
 313    case PIPE_QUERY_SO_STATISTICS:
 314       res64[0] = data64[0] - data64[4];
 315       res64[1] = data64[2] - data64[6];
 316       break;
 317    case PIPE_QUERY_PIPELINE_STATISTICS:
 318       for (i = 0; i < 8; ++i)
 319          res64[i] = data64[i * 2] - data64[16 + i * 2];
 320       break;
 321    case PIPE_QUERY_TIMESTAMP:
 322       res64[0] = data64[1];
 323       break;
 324    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 325       res64[0] = 1000000000;
 326       res8[8] = false;
 327       break;
 328    case PIPE_QUERY_TIME_ELAPSED:
 329       res64[0] = data64[1] - data64[3];
 330       break;
 331    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 332       res32[0] = hq->data[1];
 333       break;
 334    default:
 335       assert(0);
 336       return false;
 337    }
 338
 339    return true;
 340 }
 341
 342 static const struct nv50_query_funcs hw_query_funcs = {
 343    .destroy_query = nv50_hw_destroy_query,
 344    .begin_query = nv50_hw_begin_query,
 345    .end_query = nv50_hw_end_query,
 346    .get_query_result = nv50_hw_get_query_result,
 347 };
 348
 349 struct nv50_query *
 350 nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
 351 {
 352    struct nv50_hw_query *hq;
 353    struct nv50_query *q;
 354
 355    hq = nv50_hw_sm_create_query(nv50, type);
 356    if (hq) {
 357       hq->base.funcs = &hw_query_funcs;
 358       return (struct nv50_query *)hq;
 359    }
 360
 361    hq = nv50_hw_metric_create_query(nv50, type);
 362    if (hq) {
 363       hq->base.funcs = &hw_query_funcs;
 364       return (struct nv50_query *)hq;
 365    }
 366
 367    hq = CALLOC_STRUCT(nv50_hw_query);
 368    if (!hq)
 369       return NULL;
 370
 371    q = &hq->base;
 372    q->funcs = &hw_query_funcs;
 373    q->type = type;
 374
 375    switch (q->type) {
 376    case PIPE_QUERY_OCCLUSION_COUNTER:
 377    case PIPE_QUERY_OCCLUSION_PREDICATE:
 378    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 379       hq->rotate = 32;
 380       break;
 381    case PIPE_QUERY_PRIMITIVES_GENERATED:
 382    case PIPE_QUERY_PRIMITIVES_EMITTED:
 383    case PIPE_QUERY_SO_STATISTICS:
 384    case PIPE_QUERY_PIPELINE_STATISTICS:
 385       hq->is64bit = true;
 386       break;
 387    case PIPE_QUERY_TIME_ELAPSED:
 388    case PIPE_QUERY_TIMESTAMP:
 389    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 390    case PIPE_QUERY_GPU_FINISHED:
 391    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 392       break;
 393    default:
 394       debug_printf("invalid query type: %u\n", type);
 395       FREE(q);
 396       return NULL;
 397    }
 398
 399    if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
 400       FREE(hq);
 401       return NULL;
 402    }
 403
 404    if (hq->rotate) {
 405       /* we advance before query_begin ! */
 406       hq->offset -= hq->rotate;
 407       hq->data -= hq->rotate / sizeof(*hq->data);
 408    }
 409
 410    return q;
 411 }
 412
 413 int
 414 nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
 415                               struct pipe_driver_query_info *info)
 416 {
 417    int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
 418
 419    num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
 420    num_hw_metric_queries =
 421       nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
 422
 423    if (!info)
 424       return num_hw_sm_queries + num_hw_metric_queries;
 425
 426    if (id < num_hw_sm_queries)
 427       return nv50_hw_sm_get_driver_query_info(screen, id, info);
 428
 429    return nv50_hw_metric_get_driver_query_info(screen,
 430                                                id - num_hw_sm_queries, info);
 431 }
 432
 433 void
 434 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
 435                              struct nv50_query *q, unsigned result_offset)
 436 {
 437    struct nv50_hw_query *hq = nv50_hw_query(q);
 438
 439    nv50_hw_query_update(q);
 440    if (hq->state != NV50_HW_QUERY_STATE_READY)
 441       nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
 442    hq->state = NV50_HW_QUERY_STATE_READY;
 443
 444    BEGIN_NV04(push, SUBC_3D(method), 1);
 445    PUSH_DATA (push, hq->data[result_offset / 4]);
 446 }
 447
 448 void
 449 nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
 450 {
 451    struct nv50_hw_query *hq = nv50_hw_query(q);
 452    unsigned offset = hq->offset;
 453
 454    PUSH_SPACE(push, 5);
 455    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 456    BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
 457    PUSH_DATAh(push, hq->bo->offset + offset);
 458    PUSH_DATA (push, hq->bo->offset + offset);
 459    PUSH_DATA (push, hq->sequence);
 460    PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 461 }