src/gallium/drivers/radeon/r600_pipe_common.c

   1 /*
   2  * Copyright 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #include "r600_pipe_common.h"
  25 #include "r600_cs.h"
  26 #include "util/u_memory.h"
  27 #include "util/u_upload_mgr.h"
  28 #include "radeon/radeon_video.h"
  29
  30 /*
  31  * pipe_context
  32  */
  33
  34 /**
  35  * Write an EOP event.
  36  *
  37  * \param event         EVENT_TYPE_*
  38  * \param event_flags   Optional cache flush flags (TC)
  39  * \param data_sel      1 = fence, 3 = timestamp
  40  * \param buf           Buffer
  41  * \param va            GPU address
  42  * \param old_value     Previous fence value (for a bug workaround)
  43  * \param new_value     Fence value to write for this event.
  44  */
  45 void si_gfx_write_event_eop(struct r600_common_context *ctx,
  46                             unsigned event, unsigned event_flags,
  47                             unsigned data_sel,
  48                             struct r600_resource *buf, uint64_t va,
  49                             uint32_t new_fence, unsigned query_type)
  50 {
  51         struct radeon_winsys_cs *cs = ctx->gfx.cs;
  52         unsigned op = EVENT_TYPE(event) |
  53                       EVENT_INDEX(5) |
  54                       event_flags;
  55         unsigned sel = EOP_DATA_SEL(data_sel);
  56
  57         /* Wait for write confirmation before writing data, but don't send
  58          * an interrupt. */
  59         if (data_sel != EOP_DATA_SEL_DISCARD)
  60                 sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
  61
  62         if (ctx->chip_class >= GFX9) {
  63                 /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
  64                  * counters) must immediately precede every timestamp event to
  65                  * prevent a GPU hang on GFX9.
  66                  *
  67                  * Occlusion queries don't need to do it here, because they
  68                  * always do ZPASS_DONE before the timestamp.
  69                  */
  70                 if (ctx->chip_class == GFX9 &&
  71                     query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
  72                     query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
  73                     query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
  74                         struct r600_resource *scratch = ctx->eop_bug_scratch;
  75
  76                         assert(16 * ctx->screen->info.num_render_backends <=
  77                                scratch->b.b.width0);
  78                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
  79                         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
  80                         radeon_emit(cs, scratch->gpu_address);
  81                         radeon_emit(cs, scratch->gpu_address >> 32);
  82
  83                         radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
  84                                                   RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
  85                 }
  86
  87                 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
  88                 radeon_emit(cs, op);
  89                 radeon_emit(cs, sel);
  90                 radeon_emit(cs, va);            /* address lo */
  91                 radeon_emit(cs, va >> 32);      /* address hi */
  92                 radeon_emit(cs, new_fence);     /* immediate data lo */
  93                 radeon_emit(cs, 0); /* immediate data hi */
  94                 radeon_emit(cs, 0); /* unused */
  95         } else {
  96                 if (ctx->chip_class == CIK ||
  97                     ctx->chip_class == VI) {
  98                         struct r600_resource *scratch = ctx->eop_bug_scratch;
  99                         uint64_t va = scratch->gpu_address;
 100
 101                         /* Two EOP events are required to make all engines go idle
 102                          * (and optional cache flushes executed) before the timestamp
 103                          * is written.
 104                          */
 105                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 106                         radeon_emit(cs, op);
 107                         radeon_emit(cs, va);
 108                         radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
 109                         radeon_emit(cs, 0); /* immediate data */
 110                         radeon_emit(cs, 0); /* unused */
 111
 112                         radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
 113                                                   RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 114                 }
 115
 116                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 117                 radeon_emit(cs, op);
 118                 radeon_emit(cs, va);
 119                 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
 120                 radeon_emit(cs, new_fence); /* immediate data */
 121                 radeon_emit(cs, 0); /* unused */
 122         }
 123
 124         if (buf) {
 125                 radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
 126                                           RADEON_PRIO_QUERY);
 127         }
 128 }
 129
 130 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
 131 {
 132         unsigned dwords = 6;
 133
 134         if (screen->chip_class == CIK ||
 135             screen->chip_class == VI)
 136                 dwords *= 2;
 137
 138         if (!screen->info.has_virtual_memory)
 139                 dwords += 2;
 140
 141         return dwords;
 142 }
 143
 144 void si_gfx_wait_fence(struct r600_common_context *ctx,
 145                        uint64_t va, uint32_t ref, uint32_t mask)
 146 {
 147         struct radeon_winsys_cs *cs = ctx->gfx.cs;
 148
 149         radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
 150         radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
 151         radeon_emit(cs, va);
 152         radeon_emit(cs, va >> 32);
 153         radeon_emit(cs, ref); /* reference value */
 154         radeon_emit(cs, mask); /* mask */
 155         radeon_emit(cs, 4); /* poll interval */
 156 }
 157
 158 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
 159 {
 160         struct radeon_winsys_cs *cs = rctx->dma.cs;
 161
 162         /* NOP waits for idle on Evergreen and later. */
 163         if (rctx->chip_class >= CIK)
 164                 radeon_emit(cs, 0x00000000); /* NOP */
 165         else
 166                 radeon_emit(cs, 0xf0000000); /* NOP */
 167 }
 168
 169 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
 170                        struct r600_resource *dst, struct r600_resource *src)
 171 {
 172         uint64_t vram = ctx->dma.cs->used_vram;
 173         uint64_t gtt = ctx->dma.cs->used_gart;
 174
 175         if (dst) {
 176                 vram += dst->vram_usage;
 177                 gtt += dst->gart_usage;
 178         }
 179         if (src) {
 180                 vram += src->vram_usage;
 181                 gtt += src->gart_usage;
 182         }
 183
 184         /* Flush the GFX IB if DMA depends on it. */
 185         if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
 186             ((dst &&
 187               ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
 188                                                RADEON_USAGE_READWRITE)) ||
 189              (src &&
 190               ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
 191                                                RADEON_USAGE_WRITE))))
 192                 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 193
 194         /* Flush if there's not enough space, or if the memory usage per IB
 195          * is too large.
 196          *
 197          * IBs using too little memory are limited by the IB submission overhead.
 198          * IBs using too much memory are limited by the kernel/TTM overhead.
 199          * Too long IBs create CPU-GPU pipeline bubbles and add latency.
 200          *
 201          * This heuristic makes sure that DMA requests are executed
 202          * very soon after the call is made and lowers memory usage.
 203          * It improves texture upload performance by keeping the DMA
 204          * engine busy while uploads are being submitted.
 205          */
 206         num_dw++; /* for emit_wait_idle below */
 207         if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
 208             ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
 209             !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
 210                 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 211                 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
 212         }
 213
 214         /* Wait for idle if either buffer has been used in the IB before to
 215          * prevent read-after-write hazards.
 216          */
 217         if ((dst &&
 218              ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
 219                                               RADEON_USAGE_READWRITE)) ||
 220             (src &&
 221              ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
 222                                               RADEON_USAGE_WRITE)))
 223                 r600_dma_emit_wait_idle(ctx);
 224
 225         /* If GPUVM is not supported, the CS checker needs 2 entries
 226          * in the buffer list per packet, which has to be done manually.
 227          */
 228         if (ctx->screen->info.has_virtual_memory) {
 229                 if (dst)
 230                         radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
 231                                                   RADEON_USAGE_WRITE,
 232                                                   RADEON_PRIO_SDMA_BUFFER);
 233                 if (src)
 234                         radeon_add_to_buffer_list(ctx, &ctx->dma, src,
 235                                                   RADEON_USAGE_READ,
 236                                                   RADEON_PRIO_SDMA_BUFFER);
 237         }
 238
 239         /* this function is called before all DMA calls, so increment this. */
 240         ctx->num_dma_calls++;
 241 }
 242
 243 static void r600_flush_dma_ring(void *ctx, unsigned flags,
 244                                 struct pipe_fence_handle **fence)
 245 {
 246         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 247         struct radeon_winsys_cs *cs = rctx->dma.cs;
 248         struct radeon_saved_cs saved;
 249         bool check_vm =
 250                 (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
 251                 rctx->check_vm_faults;
 252
 253         if (!radeon_emitted(cs, 0)) {
 254                 if (fence)
 255                         rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
 256                 return;
 257         }
 258
 259         if (check_vm)
 260                 si_save_cs(rctx->ws, cs, &saved, true);
 261
 262         rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
 263         if (fence)
 264                 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
 265
 266         if (check_vm) {
 267                 /* Use conservative timeout 800ms, after which we won't wait any
 268                  * longer and assume the GPU is hung.
 269                  */
 270                 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
 271
 272                 rctx->check_vm_faults(rctx, &saved, RING_DMA);
 273                 si_clear_saved_cs(&saved);
 274         }
 275 }
 276
 277 /**
 278  * Store a linearized copy of all chunks of \p cs together with the buffer
 279  * list in \p saved.
 280  */
 281 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
 282                 struct radeon_saved_cs *saved, bool get_buffer_list)
 283 {
 284         uint32_t *buf;
 285         unsigned i;
 286
 287         /* Save the IB chunks. */
 288         saved->num_dw = cs->prev_dw + cs->current.cdw;
 289         saved->ib = MALLOC(4 * saved->num_dw);
 290         if (!saved->ib)
 291                 goto oom;
 292
 293         buf = saved->ib;
 294         for (i = 0; i < cs->num_prev; ++i) {
 295                 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
 296                 buf += cs->prev[i].cdw;
 297         }
 298         memcpy(buf, cs->current.buf, cs->current.cdw * 4);
 299
 300         if (!get_buffer_list)
 301                 return;
 302
 303         /* Save the buffer list. */
 304         saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
 305         saved->bo_list = CALLOC(saved->bo_count,
 306                                 sizeof(saved->bo_list[0]));
 307         if (!saved->bo_list) {
 308                 FREE(saved->ib);
 309                 goto oom;
 310         }
 311         ws->cs_get_buffer_list(cs, saved->bo_list);
 312
 313         return;
 314
 315 oom:
 316         fprintf(stderr, "%s: out of memory\n", __func__);
 317         memset(saved, 0, sizeof(*saved));
 318 }
 319
 320 void si_clear_saved_cs(struct radeon_saved_cs *saved)
 321 {
 322         FREE(saved->ib);
 323         FREE(saved->bo_list);
 324
 325         memset(saved, 0, sizeof(*saved));
 326 }
 327
 328 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
 329 {
 330         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 331         unsigned latest = rctx->ws->query_value(rctx->ws,
 332                                                 RADEON_GPU_RESET_COUNTER);
 333
 334         if (rctx->gpu_reset_counter == latest)
 335                 return PIPE_NO_RESET;
 336
 337         rctx->gpu_reset_counter = latest;
 338         return PIPE_UNKNOWN_CONTEXT_RESET;
 339 }
 340
 341 static void r600_set_device_reset_callback(struct pipe_context *ctx,
 342                                            const struct pipe_device_reset_callback *cb)
 343 {
 344         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 345
 346         if (cb)
 347                 rctx->device_reset_callback = *cb;
 348         else
 349                 memset(&rctx->device_reset_callback, 0,
 350                        sizeof(rctx->device_reset_callback));
 351 }
 352
 353 bool si_check_device_reset(struct r600_common_context *rctx)
 354 {
 355         enum pipe_reset_status status;
 356
 357         if (!rctx->device_reset_callback.reset)
 358                 return false;
 359
 360         if (!rctx->b.get_device_reset_status)
 361                 return false;
 362
 363         status = rctx->b.get_device_reset_status(&rctx->b);
 364         if (status == PIPE_NO_RESET)
 365                 return false;
 366
 367         rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
 368         return true;
 369 }
 370
 371 static bool r600_resource_commit(struct pipe_context *pctx,
 372                                  struct pipe_resource *resource,
 373                                  unsigned level, struct pipe_box *box,
 374                                  bool commit)
 375 {
 376         struct r600_common_context *ctx = (struct r600_common_context *)pctx;
 377         struct r600_resource *res = r600_resource(resource);
 378
 379         /*
 380          * Since buffer commitment changes cannot be pipelined, we need to
 381          * (a) flush any pending commands that refer to the buffer we're about
 382          *     to change, and
 383          * (b) wait for threaded submit to finish, including those that were
 384          *     triggered by some other, earlier operation.
 385          */
 386         if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
 387             ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
 388                                              res->buf, RADEON_USAGE_READWRITE)) {
 389                 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 390         }
 391         if (radeon_emitted(ctx->dma.cs, 0) &&
 392             ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
 393                                              res->buf, RADEON_USAGE_READWRITE)) {
 394                 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 395         }
 396
 397         ctx->ws->cs_sync_flush(ctx->dma.cs);
 398         ctx->ws->cs_sync_flush(ctx->gfx.cs);
 399
 400         assert(resource->target == PIPE_BUFFER);
 401
 402         return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
 403 }
 404
 405 bool si_common_context_init(struct r600_common_context *rctx,
 406                             struct r600_common_screen *rscreen,
 407                             unsigned context_flags)
 408 {
 409         slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
 410         slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
 411
 412         rctx->screen = rscreen;
 413         rctx->ws = rscreen->ws;
 414         rctx->family = rscreen->family;
 415         rctx->chip_class = rscreen->chip_class;
 416
 417         rctx->b.invalidate_resource = si_invalidate_resource;
 418         rctx->b.resource_commit = r600_resource_commit;
 419         rctx->b.transfer_map = u_transfer_map_vtbl;
 420         rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
 421         rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
 422         rctx->b.texture_subdata = u_default_texture_subdata;
 423         rctx->b.buffer_subdata = si_buffer_subdata;
 424
 425         if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
 426                 rctx->b.get_device_reset_status = r600_get_reset_status;
 427                 rctx->gpu_reset_counter =
 428                         rctx->ws->query_value(rctx->ws,
 429                                               RADEON_GPU_RESET_COUNTER);
 430         }
 431
 432         rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
 433
 434         si_init_context_texture_functions(rctx);
 435         si_init_query_functions(rctx);
 436
 437         if (rctx->chip_class == CIK ||
 438             rctx->chip_class == VI ||
 439             rctx->chip_class == GFX9) {
 440                 rctx->eop_bug_scratch = (struct r600_resource*)
 441                         pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
 442                                            16 * rscreen->info.num_render_backends);
 443                 if (!rctx->eop_bug_scratch)
 444                         return false;
 445         }
 446
 447         rctx->allocator_zeroed_memory =
 448                 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
 449                                       0, PIPE_USAGE_DEFAULT, 0, true);
 450         if (!rctx->allocator_zeroed_memory)
 451                 return false;
 452
 453         rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
 454                                                   0, PIPE_USAGE_STREAM);
 455         if (!rctx->b.stream_uploader)
 456                 return false;
 457
 458         rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
 459                                                  0, PIPE_USAGE_DEFAULT);
 460         if (!rctx->b.const_uploader)
 461                 return false;
 462
 463         rctx->ctx = rctx->ws->ctx_create(rctx->ws);
 464         if (!rctx->ctx)
 465                 return false;
 466
 467         if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
 468                 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
 469                                                    r600_flush_dma_ring,
 470                                                    rctx);
 471                 rctx->dma.flush = r600_flush_dma_ring;
 472         }
 473
 474         return true;
 475 }
 476
 477 void si_common_context_cleanup(struct r600_common_context *rctx)
 478 {
 479         unsigned i,j;
 480
 481         /* Release DCC stats. */
 482         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
 483                 assert(!rctx->dcc_stats[i].query_active);
 484
 485                 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
 486                         if (rctx->dcc_stats[i].ps_stats[j])
 487                                 rctx->b.destroy_query(&rctx->b,
 488                                                       rctx->dcc_stats[i].ps_stats[j]);
 489
 490                 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
 491         }
 492
 493         if (rctx->query_result_shader)
 494                 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
 495
 496         if (rctx->gfx.cs)
 497                 rctx->ws->cs_destroy(rctx->gfx.cs);
 498         if (rctx->dma.cs)
 499                 rctx->ws->cs_destroy(rctx->dma.cs);
 500         if (rctx->ctx)
 501                 rctx->ws->ctx_destroy(rctx->ctx);
 502
 503         if (rctx->b.stream_uploader)
 504                 u_upload_destroy(rctx->b.stream_uploader);
 505         if (rctx->b.const_uploader)
 506                 u_upload_destroy(rctx->b.const_uploader);
 507
 508         slab_destroy_child(&rctx->pool_transfers);
 509         slab_destroy_child(&rctx->pool_transfers_unsync);
 510
 511         if (rctx->allocator_zeroed_memory) {
 512                 u_suballocator_destroy(rctx->allocator_zeroed_memory);
 513         }
 514         rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
 515         rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
 516         r600_resource_reference(&rctx->eop_bug_scratch, NULL);
 517 }
 518
 519 /*
 520  * pipe_screen
 521  */
 522
 523 static const struct debug_named_value common_debug_options[] = {
 524         /* logging */
 525         { "tex", DBG(TEX), "Print texture info" },
 526         { "nir", DBG(NIR), "Enable experimental NIR shaders" },
 527         { "compute", DBG(COMPUTE), "Print compute info" },
 528         { "vm", DBG(VM), "Print virtual addresses when creating resources" },
 529         { "info", DBG(INFO), "Print driver information" },
 530
 531         /* shaders */
 532         { "vs", DBG(VS), "Print vertex shaders" },
 533         { "gs", DBG(GS), "Print geometry shaders" },
 534         { "ps", DBG(PS), "Print pixel shaders" },
 535         { "cs", DBG(CS), "Print compute shaders" },
 536         { "tcs", DBG(TCS), "Print tessellation control shaders" },
 537         { "tes", DBG(TES), "Print tessellation evaluation shaders" },
 538         { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
 539         { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
 540         { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
 541         { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
 542         { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
 543         { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
 544
 545         { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
 546         { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
 547         { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
 548         { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
 549
 550         /* features */
 551         { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
 552         { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
 553         { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
 554         { "notiling", DBG(NO_TILING), "Disable tiling" },
 555         { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
 556         { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
 557         { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
 558         { "nowc", DBG(NO_WC), "Disable GTT write combining" },
 559         { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
 560         { "nodcc", DBG(NO_DCC), "Disable DCC." },
 561         { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
 562         { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
 563         { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
 564         { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
 565         { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
 566         { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
 567         { "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
 568         { "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
 569         { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
 570         { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
 571         { "dpbb", DBG(DPBB), "Enable DPBB." },
 572         { "dfsm", DBG(DFSM), "Enable DFSM." },
 573         { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
 574         { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
 575
 576         DEBUG_NAMED_VALUE_END /* must be last */
 577 };
 578
 579 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
 580                                                 const struct pipe_resource *templ)
 581 {
 582         if (templ->target == PIPE_BUFFER) {
 583                 return si_buffer_create(screen, templ, 256);
 584         } else {
 585                 return si_texture_create(screen, templ);
 586         }
 587 }
 588
 589 bool si_common_screen_init(struct r600_common_screen *rscreen,
 590                            struct radeon_winsys *ws)
 591 {
 592         rscreen->b.resource_destroy = u_resource_destroy_vtbl;
 593         rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
 594
 595         if (rscreen->info.has_hw_decode) {
 596                 rscreen->b.is_video_format_supported = si_vid_is_format_supported;
 597         } else {
 598                 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
 599         }
 600
 601         si_init_screen_texture_functions(rscreen);
 602         si_init_screen_query_functions(rscreen);
 603
 604         rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 605         rscreen->has_rbplus = false;
 606         rscreen->rbplus_allowed = false;
 607
 608         slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
 609
 610         rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
 611         if (rscreen->force_aniso >= 0) {
 612                 printf("radeon: Forcing anisotropy filter to %ix\n",
 613                        /* round down to a power of two */
 614                        1 << util_logbase2(rscreen->force_aniso));
 615         }
 616
 617         (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
 618         (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
 619
 620         if (rscreen->debug_flags & DBG(INFO)) {
 621                 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
 622                        rscreen->info.pci_domain, rscreen->info.pci_bus,
 623                        rscreen->info.pci_dev, rscreen->info.pci_func);
 624                 printf("pci_id = 0x%x\n", rscreen->info.pci_id);
 625                 printf("family = %i\n", rscreen->info.family);
 626                 printf("chip_class = %i\n", rscreen->info.chip_class);
 627                 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
 628                 printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
 629                 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
 630                 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
 631                 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
 632                 printf("max_alloc_size = %i MB\n",
 633                        (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
 634                 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
 635                 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
 636                 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
 637                 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
 638                 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
 639                 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
 640                 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
 641                 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
 642                 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
 643                 printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
 644                 printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
 645                 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
 646                 printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
 647                 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
 648                 printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
 649                 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
 650                 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
 651                 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
 652                 printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
 653                        rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
 654                 printf("has_userptr = %i\n", rscreen->info.has_userptr);
 655                 printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
 656                 printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
 657
 658                 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
 659                 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
 660                 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
 661                 printf("max_se = %i\n", rscreen->info.max_se);
 662                 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
 663
 664                 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
 665                 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
 666                 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
 667                 printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
 668                 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
 669                 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
 670                 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
 671                 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
 672         }
 673         return true;
 674 }
 675
 676 void si_destroy_common_screen(struct r600_common_screen *rscreen)
 677 {
 678         si_perfcounters_destroy(rscreen);
 679         si_gpu_load_kill_thread(rscreen);
 680
 681         mtx_destroy(&rscreen->gpu_load_mutex);
 682         mtx_destroy(&rscreen->aux_context_lock);
 683         rscreen->aux_context->destroy(rscreen->aux_context);
 684
 685         slab_destroy_parent(&rscreen->pool_transfers);
 686
 687         disk_cache_destroy(rscreen->disk_shader_cache);
 688         rscreen->ws->destroy(rscreen->ws);
 689         FREE(rscreen);
 690 }
 691
 692 bool si_can_dump_shader(struct r600_common_screen *rscreen,
 693                         unsigned processor)
 694 {
 695         return rscreen->debug_flags & (1 << processor);
 696 }
 697
 698 bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
 699 {
 700         return (rscreen->debug_flags & DBG(CHECK_IR)) ||
 701                si_can_dump_shader(rscreen, processor);
 702 }
 703
 704 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
 705                             uint64_t offset, uint64_t size, unsigned value)
 706 {
 707         struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 708
 709         mtx_lock(&rscreen->aux_context_lock);
 710         rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
 711         rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 712         mtx_unlock(&rscreen->aux_context_lock);
 713 }