src/gallium/drivers/radeon/r600_pipe_common.c

   1 /*
   2  * Copyright 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Marek Olšák <maraeo@gmail.com>
  24  *
  25  */
  26
  27 #include "r600_pipe_common.h"
  28 #include "r600_cs.h"
  29 #include "tgsi/tgsi_parse.h"
  30 #include "util/u_format_s3tc.h"
  31
  32 static const struct debug_named_value common_debug_options[] = {
  33         /* logging */
  34         { "tex", DBG_TEX, "Print texture info" },
  35         { "texmip", DBG_TEXMIP, "Print texture info (mipmapped only)" },
  36         { "compute", DBG_COMPUTE, "Print compute info" },
  37         { "vm", DBG_VM, "Print virtual addresses when creating resources" },
  38         { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
  39
  40         /* shaders */
  41         { "fs", DBG_FS, "Print fetch shaders" },
  42         { "vs", DBG_VS, "Print vertex shaders" },
  43         { "gs", DBG_GS, "Print geometry shaders" },
  44         { "ps", DBG_PS, "Print pixel shaders" },
  45         { "cs", DBG_CS, "Print compute shaders" },
  46
  47         DEBUG_NAMED_VALUE_END /* must be last */
  48 };
  49
  50 static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
  51                                   uint32_t tiling_config)
  52 {
  53         switch ((tiling_config & 0xe) >> 1) {
  54         case 0:
  55                 rscreen->tiling_info.num_channels = 1;
  56                 break;
  57         case 1:
  58                 rscreen->tiling_info.num_channels = 2;
  59                 break;
  60         case 2:
  61                 rscreen->tiling_info.num_channels = 4;
  62                 break;
  63         case 3:
  64                 rscreen->tiling_info.num_channels = 8;
  65                 break;
  66         default:
  67                 return false;
  68         }
  69
  70         switch ((tiling_config & 0x30) >> 4) {
  71         case 0:
  72                 rscreen->tiling_info.num_banks = 4;
  73                 break;
  74         case 1:
  75                 rscreen->tiling_info.num_banks = 8;
  76                 break;
  77         default:
  78                 return false;
  79
  80         }
  81         switch ((tiling_config & 0xc0) >> 6) {
  82         case 0:
  83                 rscreen->tiling_info.group_bytes = 256;
  84                 break;
  85         case 1:
  86                 rscreen->tiling_info.group_bytes = 512;
  87                 break;
  88         default:
  89                 return false;
  90         }
  91         return true;
  92 }
  93
  94 static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
  95                                        uint32_t tiling_config)
  96 {
  97         switch (tiling_config & 0xf) {
  98         case 0:
  99                 rscreen->tiling_info.num_channels = 1;
 100                 break;
 101         case 1:
 102                 rscreen->tiling_info.num_channels = 2;
 103                 break;
 104         case 2:
 105                 rscreen->tiling_info.num_channels = 4;
 106                 break;
 107         case 3:
 108                 rscreen->tiling_info.num_channels = 8;
 109                 break;
 110         default:
 111                 return false;
 112         }
 113
 114         switch ((tiling_config & 0xf0) >> 4) {
 115         case 0:
 116                 rscreen->tiling_info.num_banks = 4;
 117                 break;
 118         case 1:
 119                 rscreen->tiling_info.num_banks = 8;
 120                 break;
 121         case 2:
 122                 rscreen->tiling_info.num_banks = 16;
 123                 break;
 124         default:
 125                 return false;
 126         }
 127
 128         switch ((tiling_config & 0xf00) >> 8) {
 129         case 0:
 130                 rscreen->tiling_info.group_bytes = 256;
 131                 break;
 132         case 1:
 133                 rscreen->tiling_info.group_bytes = 512;
 134                 break;
 135         default:
 136                 return false;
 137         }
 138         return true;
 139 }
 140
 141 static bool r600_init_tiling(struct r600_common_screen *rscreen)
 142 {
 143         uint32_t tiling_config = rscreen->info.r600_tiling_config;
 144
 145         /* set default group bytes, overridden by tiling info ioctl */
 146         if (rscreen->chip_class <= R700) {
 147                 rscreen->tiling_info.group_bytes = 256;
 148         } else {
 149                 rscreen->tiling_info.group_bytes = 512;
 150         }
 151
 152         if (!tiling_config)
 153                 return true;
 154
 155         if (rscreen->chip_class <= R700) {
 156                 return r600_interpret_tiling(rscreen, tiling_config);
 157         } else {
 158                 return evergreen_interpret_tiling(rscreen, tiling_config);
 159         }
 160 }
 161
 162 bool r600_common_screen_init(struct r600_common_screen *rscreen,
 163                              struct radeon_winsys *ws)
 164 {
 165         ws->query_info(ws, &rscreen->info);
 166
 167         rscreen->ws = ws;
 168         rscreen->family = rscreen->info.family;
 169         rscreen->chip_class = rscreen->info.chip_class;
 170         rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 171
 172         if (!r600_init_tiling(rscreen)) {
 173                 return false;
 174         }
 175
 176         util_format_s3tc_init();
 177
 178         pipe_mutex_init(rscreen->aux_context_lock);
 179         return true;
 180 }
 181
 182 void r600_common_screen_cleanup(struct r600_common_screen *rscreen)
 183 {
 184         pipe_mutex_destroy(rscreen->aux_context_lock);
 185         rscreen->aux_context->destroy(rscreen->aux_context);
 186 }
 187
 188 bool r600_common_context_init(struct r600_common_context *rctx,
 189                               struct r600_common_screen *rscreen)
 190 {
 191         rctx->ws = rscreen->ws;
 192         rctx->family = rscreen->family;
 193         rctx->chip_class = rscreen->chip_class;
 194
 195         r600_streamout_init(rctx);
 196
 197         rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4,
 198                                                                0, PIPE_USAGE_STATIC, TRUE);
 199         if (!rctx->allocator_so_filled_size)
 200                 return false;
 201
 202         return true;
 203 }
 204
 205 void r600_common_context_cleanup(struct r600_common_context *rctx)
 206 {
 207         if (rctx->allocator_so_filled_size) {
 208                 u_suballocator_destroy(rctx->allocator_so_filled_size);
 209         }
 210 }
 211
 212 void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
 213 {
 214         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 215         struct r600_resource *rr = (struct r600_resource *)r;
 216
 217         if (r == NULL) {
 218                 return;
 219         }
 220
 221         /*
 222          * The idea is to compute a gross estimate of memory requirement of
 223          * each draw call. After each draw call, memory will be precisely
 224          * accounted. So the uncertainty is only on the current draw call.
 225          * In practice this gave very good estimate (+/- 10% of the target
 226          * memory limit).
 227          */
 228         if (rr->domains & RADEON_DOMAIN_GTT) {
 229                 rctx->gtt += rr->buf->size;
 230         }
 231         if (rr->domains & RADEON_DOMAIN_VRAM) {
 232                 rctx->vram += rr->buf->size;
 233         }
 234 }
 235
 236 static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
 237 {
 238         struct tgsi_parse_context parse;
 239
 240         if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
 241                 debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__, __LINE__);
 242                 return ~0;
 243         }
 244         return parse.FullHeader.Processor.Processor;
 245 }
 246
 247 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 248                           const struct tgsi_token *tokens)
 249 {
 250         switch (tgsi_get_processor_type(tokens)) {
 251         case TGSI_PROCESSOR_VERTEX:
 252                 return (rscreen->debug_flags & DBG_VS) != 0;
 253         case TGSI_PROCESSOR_GEOMETRY:
 254                 return (rscreen->debug_flags & DBG_GS) != 0;
 255         case TGSI_PROCESSOR_FRAGMENT:
 256                 return (rscreen->debug_flags & DBG_PS) != 0;
 257         case TGSI_PROCESSOR_COMPUTE:
 258                 return (rscreen->debug_flags & DBG_CS) != 0;
 259         default:
 260                 return false;
 261         }
 262 }
 263
 264 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
 265                               unsigned offset, unsigned size, unsigned value)
 266 {
 267         struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 268
 269         pipe_mutex_lock(rscreen->aux_context_lock);
 270         rctx->clear_buffer(&rctx->b, dst, offset, size, value);
 271         rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 272         pipe_mutex_unlock(rscreen->aux_context_lock);
 273 }
 274
 275 boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
 276                                         struct radeon_winsys_cs_handle *buf,
 277                                         enum radeon_bo_usage usage)
 278 {
 279         if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
 280                 return TRUE;
 281         }
 282         if (ctx->rings.dma.cs &&
 283             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
 284                 return TRUE;
 285         }
 286         return FALSE;
 287 }
 288
 289 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 290                                       struct r600_resource *resource,
 291                                       unsigned usage)
 292 {
 293         enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
 294
 295         if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 296                 return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 297         }
 298
 299         if (!(usage & PIPE_TRANSFER_WRITE)) {
 300                 /* have to wait for the last write */
 301                 rusage = RADEON_USAGE_WRITE;
 302         }
 303
 304         if (ctx->rings.gfx.cs->cdw &&
 305             ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
 306                                              resource->cs_buf, rusage)) {
 307                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 308                         ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
 309                         return NULL;
 310                 } else {
 311                         ctx->rings.gfx.flush(ctx, 0);
 312                 }
 313         }
 314         if (ctx->rings.dma.cs &&
 315             ctx->rings.dma.cs->cdw &&
 316             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
 317                                              resource->cs_buf, rusage)) {
 318                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 319                         ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
 320                         return NULL;
 321                 } else {
 322                         ctx->rings.dma.flush(ctx, 0);
 323                 }
 324         }
 325
 326         if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
 327                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 328                         return NULL;
 329                 } else {
 330                         /* We will be wait for the GPU. Wait for any offloaded
 331                          * CS flush to complete to avoid busy-waiting in the winsys. */
 332                         ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
 333                         if (ctx->rings.dma.cs)
 334                                 ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
 335                 }
 336         }
 337
 338         return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 339 }
 340
 341 bool r600_init_resource(struct r600_common_screen *rscreen,
 342                         struct r600_resource *res,
 343                         unsigned size, unsigned alignment,
 344                         bool use_reusable_pool, unsigned usage)
 345 {
 346         uint32_t initial_domain, domains;
 347
 348         switch(usage) {
 349         case PIPE_USAGE_STAGING:
 350                 /* Staging resources participate in transfers, i.e. are used
 351                  * for uploads and downloads from regular resources.
 352                  * We generate them internally for some transfers.
 353                  */
 354                 initial_domain = RADEON_DOMAIN_GTT;
 355                 domains = RADEON_DOMAIN_GTT;
 356                 break;
 357         case PIPE_USAGE_DYNAMIC:
 358         case PIPE_USAGE_STREAM:
 359                 /* Default to GTT, but allow the memory manager to move it to VRAM. */
 360                 initial_domain = RADEON_DOMAIN_GTT;
 361                 domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 362                 break;
 363         case PIPE_USAGE_DEFAULT:
 364         case PIPE_USAGE_STATIC:
 365         case PIPE_USAGE_IMMUTABLE:
 366         default:
 367                 /* Don't list GTT here, because the memory manager would put some
 368                  * resources to GTT no matter what the initial domain is.
 369                  * Not listing GTT in the domains improves performance a lot. */
 370                 initial_domain = RADEON_DOMAIN_VRAM;
 371                 domains = RADEON_DOMAIN_VRAM;
 372                 break;
 373         }
 374
 375         res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
 376                                               use_reusable_pool,
 377                                               initial_domain);
 378         if (!res->buf) {
 379                 return false;
 380         }
 381
 382         res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
 383         res->domains = domains;
 384         util_range_set_empty(&res->valid_buffer_range);
 385
 386         if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
 387                 fprintf(stderr, "VM start=0x%llX  end=0x%llX | Buffer %u bytes\n",
 388                         r600_resource_va(&rscreen->b, &res->b.b),
 389                         r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
 390                         res->buf->size);
 391         }
 392         return true;
 393 }