src/gallium/drivers/radeon/r600_pipe_common.c

   1 /*
   2  * Copyright 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Marek Olšák <maraeo@gmail.com>
  24  *
  25  */
  26
  27 #include "r600_pipe_common.h"
  28 #include "tgsi/tgsi_parse.h"
  29 #include "util/u_format_s3tc.h"
  30
  31 static const struct debug_named_value common_debug_options[] = {
  32         /* logging */
  33         { "texdepth", DBG_TEX_DEPTH, "Print texture depth info" },
  34         { "compute", DBG_COMPUTE, "Print compute info" },
  35         { "vm", DBG_VM, "Print virtual addresses when creating resources" },
  36         { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
  37
  38         /* shaders */
  39         { "fs", DBG_FS, "Print fetch shaders" },
  40         { "vs", DBG_VS, "Print vertex shaders" },
  41         { "gs", DBG_GS, "Print geometry shaders" },
  42         { "ps", DBG_PS, "Print pixel shaders" },
  43         { "cs", DBG_CS, "Print compute shaders" },
  44
  45         DEBUG_NAMED_VALUE_END /* must be last */
  46 };
  47
  48 static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
  49                                   uint32_t tiling_config)
  50 {
  51         switch ((tiling_config & 0xe) >> 1) {
  52         case 0:
  53                 rscreen->tiling_info.num_channels = 1;
  54                 break;
  55         case 1:
  56                 rscreen->tiling_info.num_channels = 2;
  57                 break;
  58         case 2:
  59                 rscreen->tiling_info.num_channels = 4;
  60                 break;
  61         case 3:
  62                 rscreen->tiling_info.num_channels = 8;
  63                 break;
  64         default:
  65                 return false;
  66         }
  67
  68         switch ((tiling_config & 0x30) >> 4) {
  69         case 0:
  70                 rscreen->tiling_info.num_banks = 4;
  71                 break;
  72         case 1:
  73                 rscreen->tiling_info.num_banks = 8;
  74                 break;
  75         default:
  76                 return false;
  77
  78         }
  79         switch ((tiling_config & 0xc0) >> 6) {
  80         case 0:
  81                 rscreen->tiling_info.group_bytes = 256;
  82                 break;
  83         case 1:
  84                 rscreen->tiling_info.group_bytes = 512;
  85                 break;
  86         default:
  87                 return false;
  88         }
  89         return true;
  90 }
  91
  92 static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
  93                                        uint32_t tiling_config)
  94 {
  95         switch (tiling_config & 0xf) {
  96         case 0:
  97                 rscreen->tiling_info.num_channels = 1;
  98                 break;
  99         case 1:
 100                 rscreen->tiling_info.num_channels = 2;
 101                 break;
 102         case 2:
 103                 rscreen->tiling_info.num_channels = 4;
 104                 break;
 105         case 3:
 106                 rscreen->tiling_info.num_channels = 8;
 107                 break;
 108         default:
 109                 return false;
 110         }
 111
 112         switch ((tiling_config & 0xf0) >> 4) {
 113         case 0:
 114                 rscreen->tiling_info.num_banks = 4;
 115                 break;
 116         case 1:
 117                 rscreen->tiling_info.num_banks = 8;
 118                 break;
 119         case 2:
 120                 rscreen->tiling_info.num_banks = 16;
 121                 break;
 122         default:
 123                 return false;
 124         }
 125
 126         switch ((tiling_config & 0xf00) >> 8) {
 127         case 0:
 128                 rscreen->tiling_info.group_bytes = 256;
 129                 break;
 130         case 1:
 131                 rscreen->tiling_info.group_bytes = 512;
 132                 break;
 133         default:
 134                 return false;
 135         }
 136         return true;
 137 }
 138
 139 static bool r600_init_tiling(struct r600_common_screen *rscreen)
 140 {
 141         uint32_t tiling_config = rscreen->info.r600_tiling_config;
 142
 143         /* set default group bytes, overridden by tiling info ioctl */
 144         if (rscreen->chip_class <= R700) {
 145                 rscreen->tiling_info.group_bytes = 256;
 146         } else {
 147                 rscreen->tiling_info.group_bytes = 512;
 148         }
 149
 150         if (!tiling_config)
 151                 return true;
 152
 153         if (rscreen->chip_class <= R700) {
 154                 return r600_interpret_tiling(rscreen, tiling_config);
 155         } else {
 156                 return evergreen_interpret_tiling(rscreen, tiling_config);
 157         }
 158 }
 159
 160 bool r600_common_screen_init(struct r600_common_screen *rscreen,
 161                              struct radeon_winsys *ws)
 162 {
 163         ws->query_info(ws, &rscreen->info);
 164
 165         rscreen->ws = ws;
 166         rscreen->family = rscreen->info.family;
 167         rscreen->chip_class = rscreen->info.chip_class;
 168         rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 169
 170         if (!r600_init_tiling(rscreen)) {
 171                 return false;
 172         }
 173
 174         util_format_s3tc_init();
 175
 176         /* Create the auxiliary context. */
 177         pipe_mutex_init(rscreen->aux_context_lock);
 178         rscreen->aux_context = rscreen->b.context_create(&rscreen->b, NULL);
 179         return true;
 180 }
 181
 182 void r600_common_screen_cleanup(struct r600_common_screen *rscreen)
 183 {
 184         pipe_mutex_destroy(rscreen->aux_context_lock);
 185         rscreen->aux_context->destroy(rscreen->aux_context);
 186 }
 187
 188 bool r600_common_context_init(struct r600_common_context *rctx,
 189                               struct r600_common_screen *rscreen)
 190 {
 191         rctx->ws = rscreen->ws;
 192         rctx->family = rscreen->family;
 193         rctx->chip_class = rscreen->chip_class;
 194
 195         r600_streamout_init(rctx);
 196
 197         rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4,
 198                                                                0, PIPE_USAGE_STATIC, TRUE);
 199         if (!rctx->allocator_so_filled_size)
 200                 return false;
 201
 202         return true;
 203 }
 204
 205 void r600_common_context_cleanup(struct r600_common_context *rctx)
 206 {
 207         if (rctx->allocator_so_filled_size) {
 208                 u_suballocator_destroy(rctx->allocator_so_filled_size);
 209         }
 210 }
 211
 212 void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
 213 {
 214         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 215         struct r600_resource *rr = (struct r600_resource *)r;
 216
 217         if (r == NULL) {
 218                 return;
 219         }
 220
 221         /*
 222          * The idea is to compute a gross estimate of memory requirement of
 223          * each draw call. After each draw call, memory will be precisely
 224          * accounted. So the uncertainty is only on the current draw call.
 225          * In practice this gave very good estimate (+/- 10% of the target
 226          * memory limit).
 227          */
 228         if (rr->domains & RADEON_DOMAIN_GTT) {
 229                 rctx->gtt += rr->buf->size;
 230         }
 231         if (rr->domains & RADEON_DOMAIN_VRAM) {
 232                 rctx->vram += rr->buf->size;
 233         }
 234 }
 235
 236 static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
 237 {
 238         struct tgsi_parse_context parse;
 239
 240         if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
 241                 debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__, __LINE__);
 242                 return ~0;
 243         }
 244         return parse.FullHeader.Processor.Processor;
 245 }
 246
 247 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 248                           const struct tgsi_token *tokens)
 249 {
 250         switch (tgsi_get_processor_type(tokens)) {
 251         case TGSI_PROCESSOR_VERTEX:
 252                 return (rscreen->debug_flags & DBG_VS) != 0;
 253         case TGSI_PROCESSOR_GEOMETRY:
 254                 return (rscreen->debug_flags & DBG_GS) != 0;
 255         case TGSI_PROCESSOR_FRAGMENT:
 256                 return (rscreen->debug_flags & DBG_PS) != 0;
 257         case TGSI_PROCESSOR_COMPUTE:
 258                 return (rscreen->debug_flags & DBG_CS) != 0;
 259         default:
 260                 return false;
 261         }
 262 }
 263
 264 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
 265                               unsigned offset, unsigned size, unsigned value)
 266 {
 267         struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 268
 269         pipe_mutex_lock(rscreen->aux_context_lock);
 270         rctx->clear_buffer(&rctx->b, dst, offset, size, value);
 271         rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
 272         pipe_mutex_unlock(rscreen->aux_context_lock);
 273 }
 274
 275 boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
 276                                         struct radeon_winsys_cs_handle *buf,
 277                                         enum radeon_bo_usage usage)
 278 {
 279         if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
 280                 return TRUE;
 281         }
 282         if (ctx->rings.dma.cs &&
 283             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
 284                 return TRUE;
 285         }
 286         return FALSE;
 287 }
 288
 289 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 290                                       struct r600_resource *resource,
 291                                       unsigned usage)
 292 {
 293         enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
 294
 295         if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 296                 return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 297         }
 298
 299         if (!(usage & PIPE_TRANSFER_WRITE)) {
 300                 /* have to wait for the last write */
 301                 rusage = RADEON_USAGE_WRITE;
 302         }
 303
 304         if (ctx->rings.gfx.cs->cdw &&
 305             ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
 306                                              resource->cs_buf, rusage)) {
 307                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 308                         ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
 309                         return NULL;
 310                 } else {
 311                         ctx->rings.gfx.flush(ctx, 0);
 312                 }
 313         }
 314         if (ctx->rings.dma.cs &&
 315             ctx->rings.dma.cs->cdw &&
 316             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
 317                                              resource->cs_buf, rusage)) {
 318                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 319                         ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
 320                         return NULL;
 321                 } else {
 322                         ctx->rings.dma.flush(ctx, 0);
 323                 }
 324         }
 325
 326         if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
 327                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
 328                         return NULL;
 329                 } else {
 330                         /* We will be wait for the GPU. Wait for any offloaded
 331                          * CS flush to complete to avoid busy-waiting in the winsys. */
 332                         ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
 333                         if (ctx->rings.dma.cs)
 334                                 ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
 335                 }
 336         }
 337
 338         return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
 339 }