src/gallium/drivers/radeon/r600_pipe_common.c

   1 /*
   2  * Copyright 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Marek Olšák <maraeo@gmail.com>
  24  *
  25  */
  26
  27 #include "r600_pipe_common.h"
  28 #include "r600_cs.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_format_s3tc.h"
  31
  32 static const struct debug_named_value common_debug_options[] = {
  33         /* logging */
  34         { "tex", DBG_TEX, "Print texture info" },
  35         { "texmip", DBG_TEXMIP, "Print texture info (mipmapped only)" },
  36         { "compute", DBG_COMPUTE, "Print compute info" },
  37         { "vm", DBG_VM, "Print virtual addresses when creating resources" },
  38         { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
  39
  40         /* shaders */
  41         { "fs", DBG_FS, "Print fetch shaders" },
  42         { "vs", DBG_VS, "Print vertex shaders" },
  43         { "gs", DBG_GS, "Print geometry shaders" },
  44         { "ps", DBG_PS, "Print pixel shaders" },
  45         { "cs", DBG_CS, "Print compute shaders" },
  46
  47         DEBUG_NAMED_VALUE_END /* must be last */
  48 };
  49
  50 static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
  51                                   uint32_t tiling_config)
  52 {
  53         switch ((tiling_config & 0xe) >> 1) {
  54         case 0:
  55                 rscreen->tiling_info.num_channels = 1;
  56                 break;
  57         case 1:
  58                 rscreen->tiling_info.num_channels = 2;
  59                 break;
  60         case 2:
  61                 rscreen->tiling_info.num_channels = 4;
  62                 break;
  63         case 3:
  64                 rscreen->tiling_info.num_channels = 8;
  65                 break;
  66         default:
  67                 return false;
  68         }
  69
  70         switch ((tiling_config & 0x30) >> 4) {
  71         case 0:
  72                 rscreen->tiling_info.num_banks = 4;
  73                 break;
  74         case 1:
  75                 rscreen->tiling_info.num_banks = 8;
  76                 break;
  77         default:
  78                 return false;
  79
  80         }
  81         switch ((tiling_config & 0xc0) >> 6) {
  82         case 0:
  83                 rscreen->tiling_info.group_bytes = 256;
  84                 break;
  85         case 1:
  86                 rscreen->tiling_info.group_bytes = 512;
  87                 break;
  88         default:
  89                 return false;
  90         }
  91         return true;
  92 }
  93
  94 static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
  95                                        uint32_t tiling_config)
  96 {
  97         switch (tiling_config & 0xf) {
  98         case 0:
  99                 rscreen->tiling_info.num_channels = 1;
 100                 break;
 101         case 1:
 102                 rscreen->tiling_info.num_channels = 2;
 103                 break;
 104         case 2:
 105                 rscreen->tiling_info.num_channels = 4;
 106                 break;
 107         case 3:
 108                 rscreen->tiling_info.num_channels = 8;
 109                 break;
 110         default:
 111                 return false;
 112         }
 113
 114         switch ((tiling_config & 0xf0) >> 4) {
 115         case 0:
 116                 rscreen->tiling_info.num_banks = 4;
 117                 break;
 118         case 1:
 119                 rscreen->tiling_info.num_banks = 8;
 120                 break;
 121         case 2:
 122                 rscreen->tiling_info.num_banks = 16;
 123                 break;
 124         default:
 125                 return false;
 126         }
 127
 128         switch ((tiling_config & 0xf00) >> 8) {
 129         case 0:
 130                 rscreen->tiling_info.group_bytes = 256;
 131                 break;
 132         case 1:
 133                 rscreen->tiling_info.group_bytes = 512;
 134                 break;
 135         default:
 136                 return false;
 137         }
 138         return true;
 139 }
 140
 141 static bool r600_init_tiling(struct r600_common_screen *rscreen)
 142 {
 143         uint32_t tiling_config = rscreen->info.r600_tiling_config;
 144
 145         /* set default group bytes, overridden by tiling info ioctl */
 146         if (rscreen->chip_class <= R700) {
 147                 rscreen->tiling_info.group_bytes = 256;
 148         } else {
 149                 rscreen->tiling_info.group_bytes = 512;
 150         }
 151
 152         if (!tiling_config)
 153                 return true;
 154
 155         if (rscreen->chip_class <= R700) {
 156                 return r600_interpret_tiling(rscreen, tiling_config);
 157         } else {
 158                 return evergreen_interpret_tiling(rscreen, tiling_config);
 159         }
 160 }
 161
 162 bool r600_common_screen_init(struct r600_common_screen *rscreen,
 163                              struct radeon_winsys *ws)
 164 {
 165         ws->query_info(ws, &rscreen->info);
 166
 167         rscreen->ws = ws;
 168         rscreen->family = rscreen->info.family;
 169         rscreen->chip_class = rscreen->info.chip_class;
 170         rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 171
 172         if (!r600_init_tiling(rscreen)) {
 173                 return false;
 174         }
 175
 176         util_format_s3tc_init();
 177
 178         /* Create the auxiliary context. */
 179         pipe_mutex_init(rscreen->aux_context_lock);
 180         rscreen->aux_context = rscreen->b.context_create(&rscreen->b, NULL);
 181         return true;
 182 }
 183
 184 void r600_common_screen_cleanup(struct r600_common_screen *rscreen)
 185 {
 186         pipe_mutex_destroy(rscreen->aux_context_lock);
 187         rscreen->aux_context->destroy(rscreen->aux_context);
 188 }
 189
 190 bool r600_common_context_init(struct r600_common_context *rctx,
 191                               struct r600_common_screen *rscreen)
 192 {
 193         rctx->ws = rscreen->ws;
 194         rctx->family = rscreen->family;
 195         rctx->chip_class = rscreen->chip_class;
 196
 197         r600_streamout_init(rctx);
 198
 199         rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4,
 200                                                                0, PIPE_USAGE_STATIC, TRUE);
 201         if (!rctx->allocator_so_filled_size)
 202                 return false;
 203
 204         return true;
 205 }
 206
 207 void r600_common_context_cleanup(struct r600_common_context *rctx)
 208 {
 209         if (rctx->allocator_so_filled_size) {
 210                 u_suballocator_destroy(rctx->allocator_so_filled_size);
 211         }
 212 }
 213
 214 void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
 215 {
 216         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 217         struct r600_resource *rr = (struct r600_resource *)r;
 218
 219         if (r == NULL) {
 220                 return;
 221         }
 222
 223         /*
 224          * The idea is to compute a gross estimate of memory requirement of
 225          * each draw call. After each draw call, memory will be precisely
 226          * accounted. So the uncertainty is only on the current draw call.
 227          * In practice this gave very good estimate (+/- 10% of the target
 228          * memory limit).
 229          */
 230         if (rr->domains & RADEON_DOMAIN_GTT) {
 231                 rctx->gtt += rr->buf->size;
 232         }
 233         if (rr->domains & RADEON_DOMAIN_VRAM) {
 234                 rctx->vram += rr->buf->size;
 235         }
 236 }
 237
 238 static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
 239 {
 240         struct tgsi_parse_context parse;
 241
 242         if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
 243                 debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__, __LINE__);
 244                 return ~0;
 245         }
 246         return parse.FullHeader.Processor.Processor;
 247 }
 248
 249 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 250                           const struct tgsi_token *tokens)
 251 {
 252         switch (tgsi_get_processor_type(tokens)) {
 253         case TGSI_PROCESSOR_VERTEX:
 254                 return (rscreen->debug_flags & DBG_VS) != 0;
 255         case TGSI_PROCESSOR_GEOMETRY:
 256                 return (rscreen->debug_flags & DBG_GS) != 0;
 257         case TGSI_PROCESSOR_FRAGMENT:
 258                 return (rscreen->debug_flags & DBG_PS) != 0;
 259         case TGSI_PROCESSOR_COMPUTE:
 260                 return (rscreen->debug_flags & DBG_CS) != 0;
 261         default:
 262                 return false;
 263         }
 264 }
 265
 266 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
 267                               unsigned offset, unsigned size, unsigned value)
 268 {
 269         struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 270
 271         pipe_mutex_lock(rscreen->aux_context_lock);
 272         rctx->clear_buffer(&rctx->b, dst, offset, size, value);
 273         rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 274         pipe_mutex_unlock(rscreen->aux_context_lock);
 275 }
 276
 277 boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
 278                                         struct radeon_winsys_cs_handle *buf,
 279                                         enum radeon_bo_usage usage)
 280 {
 281         if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
 282                 return TRUE;
 283         }
 284         if (ctx->rings.dma.cs &&
 285             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
 286                 return TRUE;
 287         }
 288         return FALSE;
 289 }
 290
 291 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 292                                       struct r600_resource *resource,
 293                                       unsigned usage)
 294 {
 295         enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
 296
 297         if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 298                 return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 299         }
 300
 301         if (!(usage & PIPE_TRANSFER_WRITE)) {
 302                 /* have to wait for the last write */
 303                 rusage = RADEON_USAGE_WRITE;
 304         }
 305
 306         if (ctx->rings.gfx.cs->cdw &&
 307             ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
 308                                              resource->cs_buf, rusage)) {
 309                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 310                         ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
 311                         return NULL;
 312                 } else {
 313                         ctx->rings.gfx.flush(ctx, 0);
 314                 }
 315         }
 316         if (ctx->rings.dma.cs &&
 317             ctx->rings.dma.cs->cdw &&
 318             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
 319                                              resource->cs_buf, rusage)) {
 320                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 321                         ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
 322                         return NULL;
 323                 } else {
 324                         ctx->rings.dma.flush(ctx, 0);
 325                 }
 326         }
 327
 328         if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
 329                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 330                         return NULL;
 331                 } else {
 332                         /* We will be wait for the GPU. Wait for any offloaded
 333                          * CS flush to complete to avoid busy-waiting in the winsys. */
 334                         ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
 335                         if (ctx->rings.dma.cs)
 336                                 ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
 337                 }
 338         }
 339
 340         return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 341 }
 342
 343 bool r600_init_resource(struct r600_common_screen *rscreen,
 344                         struct r600_resource *res,
 345                         unsigned size, unsigned alignment,
 346                         bool use_reusable_pool, unsigned usage)
 347 {
 348         uint32_t initial_domain, domains;
 349
 350         switch(usage) {
 351         case PIPE_USAGE_STAGING:
 352                 /* Staging resources participate in transfers, i.e. are used
 353                  * for uploads and downloads from regular resources.
 354                  * We generate them internally for some transfers.
 355                  */
 356                 initial_domain = RADEON_DOMAIN_GTT;
 357                 domains = RADEON_DOMAIN_GTT;
 358                 break;
 359         case PIPE_USAGE_DYNAMIC:
 360         case PIPE_USAGE_STREAM:
 361                 /* Default to GTT, but allow the memory manager to move it to VRAM. */
 362                 initial_domain = RADEON_DOMAIN_GTT;
 363                 domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 364                 break;
 365         case PIPE_USAGE_DEFAULT:
 366         case PIPE_USAGE_STATIC:
 367         case PIPE_USAGE_IMMUTABLE:
 368         default:
 369                 /* Don't list GTT here, because the memory manager would put some
 370                  * resources to GTT no matter what the initial domain is.
 371                  * Not listing GTT in the domains improves performance a lot. */
 372                 initial_domain = RADEON_DOMAIN_VRAM;
 373                 domains = RADEON_DOMAIN_VRAM;
 374                 break;
 375         }
 376
 377         res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
 378                                               use_reusable_pool,
 379                                               initial_domain);
 380         if (!res->buf) {
 381                 return false;
 382         }
 383
 384         res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
 385         res->domains = domains;
 386         util_range_set_empty(&res->valid_buffer_range);
 387
 388         if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
 389                 fprintf(stderr, "VM start=0x%llX  end=0x%llX | Buffer %u bytes\n",
 390                         r600_resource_va(&rscreen->b, &res->b.b),
 391                         r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
 392                         res->buf->size);
 393         }
 394         return true;
 395 }