src/gallium/drivers/radeon/r600_texture.c

   1 /*
   2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Jerome Glisse
  25  *      Corbin Simpson
  26  */
  27 #include "r600_pipe_common.h"
  28 #include "r600_cs.h"
  29 #include "r600_query.h"
  30 #include "util/u_format.h"
  31 #include "util/u_memory.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_surface.h"
  34 #include "os/os_time.h"
  35 #include <errno.h>
  36 #include <inttypes.h>
  37
  38 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
  39                                        struct r600_texture *rtex);
  40 static enum radeon_surf_mode
  41 r600_choose_tiling(struct r600_common_screen *rscreen,
  42                    const struct pipe_resource *templ);
  43
  44
  45 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
  46                                struct r600_texture *rdst,
  47                                unsigned dst_level, unsigned dstx,
  48                                unsigned dsty, unsigned dstz,
  49                                struct r600_texture *rsrc,
  50                                unsigned src_level,
  51                                const struct pipe_box *src_box)
  52 {
  53         if (!rctx->dma.cs)
  54                 return false;
  55
  56         if (rdst->surface.bpe != rsrc->surface.bpe)
  57                 return false;
  58
  59         /* MSAA: Blits don't exist in the real world. */
  60         if (rsrc->resource.b.b.nr_samples > 1 ||
  61             rdst->resource.b.b.nr_samples > 1)
  62                 return false;
  63
  64         /* Depth-stencil surfaces:
  65          *   When dst is linear, the DB->CB copy preserves HTILE.
  66          *   When dst is tiled, the 3D path must be used to update HTILE.
  67          */
  68         if (rsrc->is_depth || rdst->is_depth)
  69                 return false;
  70
  71         /* DCC as:
  72          *   src: Use the 3D path. DCC decompression is expensive.
  73          *   dst: Use the 3D path to compress the pixels with DCC.
  74          */
  75         if ((rsrc->dcc_offset && src_level < rsrc->surface.num_dcc_levels) ||
  76             (rdst->dcc_offset && dst_level < rdst->surface.num_dcc_levels))
  77                 return false;
  78
  79         /* CMASK as:
  80          *   src: Both texture and SDMA paths need decompression. Use SDMA.
  81          *   dst: If overwriting the whole texture, discard CMASK and use
  82          *        SDMA. Otherwise, use the 3D path.
  83          */
  84         if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
  85                 /* The CMASK clear is only enabled for the first level. */
  86                 assert(dst_level == 0);
  87                 if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
  88                                                       dstx, dsty, dstz, src_box->width,
  89                                                       src_box->height, src_box->depth))
  90                         return false;
  91
  92                 r600_texture_discard_cmask(rctx->screen, rdst);
  93         }
  94
  95         /* All requirements are met. Prepare textures for SDMA. */
  96         if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
  97                 rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
  98
  99         assert(!(rsrc->dirty_level_mask & (1 << src_level)));
 100         assert(!(rdst->dirty_level_mask & (1 << dst_level)));
 101
 102         return true;
 103 }
 104
 105 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
 106 static void r600_copy_region_with_blit(struct pipe_context *pipe,
 107                                        struct pipe_resource *dst,
 108                                        unsigned dst_level,
 109                                        unsigned dstx, unsigned dsty, unsigned dstz,
 110                                        struct pipe_resource *src,
 111                                        unsigned src_level,
 112                                        const struct pipe_box *src_box)
 113 {
 114         struct pipe_blit_info blit;
 115
 116         memset(&blit, 0, sizeof(blit));
 117         blit.src.resource = src;
 118         blit.src.format = src->format;
 119         blit.src.level = src_level;
 120         blit.src.box = *src_box;
 121         blit.dst.resource = dst;
 122         blit.dst.format = dst->format;
 123         blit.dst.level = dst_level;
 124         blit.dst.box.x = dstx;
 125         blit.dst.box.y = dsty;
 126         blit.dst.box.z = dstz;
 127         blit.dst.box.width = src_box->width;
 128         blit.dst.box.height = src_box->height;
 129         blit.dst.box.depth = src_box->depth;
 130         blit.mask = util_format_get_mask(src->format) &
 131                     util_format_get_mask(dst->format);
 132         blit.filter = PIPE_TEX_FILTER_NEAREST;
 133
 134         if (blit.mask) {
 135                 pipe->blit(pipe, &blit);
 136         }
 137 }
 138
 139 /* Copy from a full GPU texture to a transfer's staging one. */
 140 static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
 141 {
 142         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 143         struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
 144         struct pipe_resource *dst = &rtransfer->staging->b.b;
 145         struct pipe_resource *src = transfer->resource;
 146
 147         if (src->nr_samples > 1) {
 148                 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
 149                                            src, transfer->level, &transfer->box);
 150                 return;
 151         }
 152
 153         rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
 154                        &transfer->box);
 155 }
 156
 157 /* Copy from a transfer's staging texture to a full GPU one. */
 158 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
 159 {
 160         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 161         struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
 162         struct pipe_resource *dst = transfer->resource;
 163         struct pipe_resource *src = &rtransfer->staging->b.b;
 164         struct pipe_box sbox;
 165
 166         u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
 167
 168         if (dst->nr_samples > 1) {
 169                 r600_copy_region_with_blit(ctx, dst, transfer->level,
 170                                            transfer->box.x, transfer->box.y, transfer->box.z,
 171                                            src, 0, &sbox);
 172                 return;
 173         }
 174
 175         rctx->dma_copy(ctx, dst, transfer->level,
 176                        transfer->box.x, transfer->box.y, transfer->box.z,
 177                        src, 0, &sbox);
 178 }
 179
 180 static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
 181                                         const struct pipe_box *box)
 182 {
 183         return rtex->surface.u.legacy.level[level].offset +
 184                box->z * rtex->surface.u.legacy.level[level].slice_size +
 185                (box->y / rtex->surface.blk_h *
 186                 rtex->surface.u.legacy.level[level].nblk_x +
 187                 box->x / rtex->surface.blk_w) * rtex->surface.bpe;
 188 }
 189
 190 static int r600_init_surface(struct r600_common_screen *rscreen,
 191                              struct radeon_surf *surface,
 192                              const struct pipe_resource *ptex,
 193                              enum radeon_surf_mode array_mode,
 194                              unsigned pitch_in_bytes_override,
 195                              unsigned offset,
 196                              bool is_imported,
 197                              bool is_scanout,
 198                              bool is_flushed_depth,
 199                              bool tc_compatible_htile)
 200 {
 201         const struct util_format_description *desc =
 202                 util_format_description(ptex->format);
 203         bool is_depth, is_stencil;
 204         int r;
 205         unsigned i, bpe, flags = 0;
 206
 207         is_depth = util_format_has_depth(desc);
 208         is_stencil = util_format_has_stencil(desc);
 209
 210         if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
 211             ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
 212                 bpe = 4; /* stencil is allocated separately on evergreen */
 213         } else {
 214                 bpe = util_format_get_blocksize(ptex->format);
 215                 /* align byte per element on dword */
 216                 if (bpe == 3) {
 217                         bpe = 4;
 218                 }
 219         }
 220
 221         if (!is_flushed_depth && is_depth) {
 222                 flags |= RADEON_SURF_ZBUFFER;
 223
 224                 if (tc_compatible_htile &&
 225                     array_mode == RADEON_SURF_MODE_2D) {
 226                         /* TC-compatible HTILE only supports Z32_FLOAT.
 227                          * GFX9 also supports Z16_UNORM.
 228                          * On VI, promote Z16 to Z32. DB->CB copies will convert
 229                          * the format for transfers.
 230                          */
 231                         if (rscreen->chip_class == VI)
 232                                 bpe = 4;
 233
 234                         flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
 235                 }
 236
 237                 if (is_stencil)
 238                         flags |= RADEON_SURF_SBUFFER;
 239         }
 240
 241         if (rscreen->chip_class >= VI &&
 242             (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
 243              ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
 244                 flags |= RADEON_SURF_DISABLE_DCC;
 245
 246         if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
 247                 /* This should catch bugs in gallium users setting incorrect flags. */
 248                 assert(ptex->nr_samples <= 1 &&
 249                        ptex->array_size == 1 &&
 250                        ptex->depth0 == 1 &&
 251                        ptex->last_level == 0 &&
 252                        !(flags & RADEON_SURF_Z_OR_SBUFFER));
 253
 254                 flags |= RADEON_SURF_SCANOUT;
 255         }
 256
 257         if (is_imported)
 258                 flags |= RADEON_SURF_IMPORTED;
 259         if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
 260                 flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 261
 262         r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
 263                                       array_mode, surface);
 264         if (r) {
 265                 return r;
 266         }
 267
 268         if (pitch_in_bytes_override &&
 269             pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
 270                 /* old ddx on evergreen over estimate alignment for 1d, only 1 level
 271                  * for those
 272                  */
 273                 surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
 274                 surface->u.legacy.level[0].slice_size = pitch_in_bytes_override * surface->u.legacy.level[0].nblk_y;
 275         }
 276
 277         if (offset) {
 278                 for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
 279                         surface->u.legacy.level[i].offset += offset;
 280         }
 281         return 0;
 282 }
 283
 284 static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
 285                                        struct r600_texture *rtex,
 286                                        struct radeon_bo_metadata *metadata)
 287 {
 288         struct radeon_surf *surface = &rtex->surface;
 289
 290         memset(metadata, 0, sizeof(*metadata));
 291
 292         if (rscreen->chip_class >= GFX9) {
 293         } else {
 294                 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
 295                                            RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
 296                 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
 297                                            RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
 298                 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
 299                 metadata->u.legacy.bankw = surface->u.legacy.bankw;
 300                 metadata->u.legacy.bankh = surface->u.legacy.bankh;
 301                 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
 302                 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
 303                 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
 304                 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
 305                 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
 306         }
 307 }
 308
 309 static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
 310                                             struct r600_texture *rtex)
 311 {
 312         struct r600_common_screen *rscreen = rctx->screen;
 313         struct pipe_context *ctx = &rctx->b;
 314
 315         if (ctx == rscreen->aux_context)
 316                 mtx_lock(&rscreen->aux_context_lock);
 317
 318         ctx->flush_resource(ctx, &rtex->resource.b.b);
 319         ctx->flush(ctx, NULL, 0);
 320
 321         if (ctx == rscreen->aux_context)
 322                 mtx_unlock(&rscreen->aux_context_lock);
 323 }
 324
 325 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
 326                                        struct r600_texture *rtex)
 327 {
 328         if (!rtex->cmask.size)
 329                 return;
 330
 331         assert(rtex->resource.b.b.nr_samples <= 1);
 332
 333         /* Disable CMASK. */
 334         memset(&rtex->cmask, 0, sizeof(rtex->cmask));
 335         rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
 336         rtex->dirty_level_mask = 0;
 337
 338         if (rscreen->chip_class >= SI)
 339                 rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
 340         else
 341                 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
 342
 343         if (rtex->cmask_buffer != &rtex->resource)
 344             r600_resource_reference(&rtex->cmask_buffer, NULL);
 345
 346         /* Notify all contexts about the change. */
 347         p_atomic_inc(&rscreen->dirty_tex_counter);
 348         p_atomic_inc(&rscreen->compressed_colortex_counter);
 349 }
 350
 351 static bool r600_can_disable_dcc(struct r600_texture *rtex)
 352 {
 353         /* We can't disable DCC if it can be written by another process. */
 354         return rtex->dcc_offset &&
 355                (!rtex->resource.is_shared ||
 356                 !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
 357 }
 358
 359 static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
 360                                      struct r600_texture *rtex)
 361 {
 362         if (!r600_can_disable_dcc(rtex))
 363                 return false;
 364
 365         assert(rtex->dcc_separate_buffer == NULL);
 366
 367         /* Disable DCC. */
 368         rtex->dcc_offset = 0;
 369
 370         /* Notify all contexts about the change. */
 371         p_atomic_inc(&rscreen->dirty_tex_counter);
 372         return true;
 373 }
 374
 375 /**
 376  * Disable DCC for the texture. (first decompress, then discard metadata).
 377  *
 378  * There is unresolved multi-context synchronization issue between
 379  * screen::aux_context and the current context. If applications do this with
 380  * multiple contexts, it's already undefined behavior for them and we don't
 381  * have to worry about that. The scenario is:
 382  *
 383  * If context 1 disables DCC and context 2 has queued commands that write
 384  * to the texture via CB with DCC enabled, and the order of operations is
 385  * as follows:
 386  *   context 2 queues draw calls rendering to the texture, but doesn't flush
 387  *   context 1 disables DCC and flushes
 388  *   context 1 & 2 reset descriptors and FB state
 389  *   context 2 flushes (new compressed tiles written by the draw calls)
 390  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
 391  *   compressed tiled
 392  *
 393  * \param rctx  the current context if you have one, or rscreen->aux_context
 394  *              if you don't.
 395  */
 396 bool r600_texture_disable_dcc(struct r600_common_context *rctx,
 397                               struct r600_texture *rtex)
 398 {
 399         struct r600_common_screen *rscreen = rctx->screen;
 400
 401         if (!r600_can_disable_dcc(rtex))
 402                 return false;
 403
 404         if (&rctx->b == rscreen->aux_context)
 405                 mtx_lock(&rscreen->aux_context_lock);
 406
 407         /* Decompress DCC. */
 408         rctx->decompress_dcc(&rctx->b, rtex);
 409         rctx->b.flush(&rctx->b, NULL, 0);
 410
 411         if (&rctx->b == rscreen->aux_context)
 412                 mtx_unlock(&rscreen->aux_context_lock);
 413
 414         return r600_texture_discard_dcc(rscreen, rtex);
 415 }
 416
 417 static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
 418                                              struct r600_texture *rtex,
 419                                              bool invalidate_storage)
 420 {
 421         struct pipe_screen *screen = rctx->b.screen;
 422         struct r600_texture *new_tex;
 423         struct pipe_resource templ = rtex->resource.b.b;
 424         unsigned i;
 425
 426         templ.bind |= PIPE_BIND_LINEAR;
 427
 428         /* r600g doesn't react to dirty_tex_descriptor_counter */
 429         if (rctx->chip_class < SI)
 430                 return;
 431
 432         if (rtex->resource.is_shared ||
 433             rtex->surface.is_linear)
 434                 return;
 435
 436         /* This fails with MSAA, depth, and compressed textures. */
 437         if (r600_choose_tiling(rctx->screen, &templ) !=
 438             RADEON_SURF_MODE_LINEAR_ALIGNED)
 439                 return;
 440
 441         new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
 442         if (!new_tex)
 443                 return;
 444
 445         /* Copy the pixels to the new texture. */
 446         if (!invalidate_storage) {
 447                 for (i = 0; i <= templ.last_level; i++) {
 448                         struct pipe_box box;
 449
 450                         u_box_3d(0, 0, 0,
 451                                  u_minify(templ.width0, i), u_minify(templ.height0, i),
 452                                  util_max_layer(&templ, i) + 1, &box);
 453
 454                         rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
 455                                        &rtex->resource.b.b, i, &box);
 456                 }
 457         }
 458
 459         r600_texture_discard_cmask(rctx->screen, rtex);
 460         r600_texture_discard_dcc(rctx->screen, rtex);
 461
 462         /* Replace the structure fields of rtex. */
 463         rtex->resource.b.b.bind = templ.bind;
 464         pb_reference(&rtex->resource.buf, new_tex->resource.buf);
 465         rtex->resource.gpu_address = new_tex->resource.gpu_address;
 466         rtex->resource.vram_usage = new_tex->resource.vram_usage;
 467         rtex->resource.gart_usage = new_tex->resource.gart_usage;
 468         rtex->resource.bo_size = new_tex->resource.bo_size;
 469         rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
 470         rtex->resource.domains = new_tex->resource.domains;
 471         rtex->resource.flags = new_tex->resource.flags;
 472         rtex->size = new_tex->size;
 473         rtex->surface = new_tex->surface;
 474         rtex->non_disp_tiling = new_tex->non_disp_tiling;
 475         rtex->cb_color_info = new_tex->cb_color_info;
 476         rtex->cmask = new_tex->cmask; /* needed even without CMASK */
 477
 478         assert(!rtex->htile_buffer);
 479         assert(!rtex->cmask.size);
 480         assert(!rtex->fmask.size);
 481         assert(!rtex->dcc_offset);
 482         assert(!rtex->is_depth);
 483
 484         r600_texture_reference(&new_tex, NULL);
 485
 486         p_atomic_inc(&rctx->screen->dirty_tex_counter);
 487 }
 488
 489 static boolean r600_texture_get_handle(struct pipe_screen* screen,
 490                                        struct pipe_context *ctx,
 491                                        struct pipe_resource *resource,
 492                                        struct winsys_handle *whandle,
 493                                        unsigned usage)
 494 {
 495         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 496         struct r600_common_context *rctx = (struct r600_common_context*)
 497                                            (ctx ? ctx : rscreen->aux_context);
 498         struct r600_resource *res = (struct r600_resource*)resource;
 499         struct r600_texture *rtex = (struct r600_texture*)resource;
 500         struct radeon_bo_metadata metadata;
 501         bool update_metadata = false;
 502
 503         /* This is not supported now, but it might be required for OpenCL
 504          * interop in the future.
 505          */
 506         if (resource->target != PIPE_BUFFER &&
 507             (resource->nr_samples > 1 || rtex->is_depth))
 508                 return false;
 509
 510         if (resource->target != PIPE_BUFFER) {
 511                 /* Since shader image stores don't support DCC on VI,
 512                  * disable it for external clients that want write
 513                  * access.
 514                  */
 515                 if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
 516                         if (r600_texture_disable_dcc(rctx, rtex))
 517                                 update_metadata = true;
 518                 }
 519
 520                 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
 521                     (rtex->cmask.size || rtex->dcc_offset)) {
 522                         /* Eliminate fast clear (both CMASK and DCC) */
 523                         r600_eliminate_fast_color_clear(rctx, rtex);
 524
 525                         /* Disable CMASK if flush_resource isn't going
 526                          * to be called.
 527                          */
 528                         if (rtex->cmask.size)
 529                                 r600_texture_discard_cmask(rscreen, rtex);
 530                 }
 531
 532                 /* Set metadata. */
 533                 if (!res->is_shared || update_metadata) {
 534                         r600_texture_init_metadata(rscreen, rtex, &metadata);
 535                         if (rscreen->query_opaque_metadata)
 536                                 rscreen->query_opaque_metadata(rscreen, rtex,
 537                                                                &metadata);
 538
 539                         rscreen->ws->buffer_set_metadata(res->buf, &metadata);
 540                 }
 541         }
 542
 543         if (res->is_shared) {
 544                 /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
 545                  * doesn't set it.
 546                  */
 547                 res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
 548                 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
 549                         res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
 550         } else {
 551                 res->is_shared = true;
 552                 res->external_usage = usage;
 553         }
 554
 555         return rscreen->ws->buffer_get_handle(res->buf,
 556                                               rtex->surface.u.legacy.level[0].nblk_x *
 557                                               rtex->surface.bpe,
 558                                               rtex->surface.u.legacy.level[0].offset,
 559                                               rtex->surface.u.legacy.level[0].slice_size,
 560                                               whandle);
 561 }
 562
 563 static void r600_texture_destroy(struct pipe_screen *screen,
 564                                  struct pipe_resource *ptex)
 565 {
 566         struct r600_texture *rtex = (struct r600_texture*)ptex;
 567         struct r600_resource *resource = &rtex->resource;
 568
 569         r600_texture_reference(&rtex->flushed_depth_texture, NULL);
 570
 571         r600_resource_reference(&rtex->htile_buffer, NULL);
 572         if (rtex->cmask_buffer != &rtex->resource) {
 573             r600_resource_reference(&rtex->cmask_buffer, NULL);
 574         }
 575         pb_reference(&resource->buf, NULL);
 576         r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
 577         r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
 578         FREE(rtex);
 579 }
 580
 581 static const struct u_resource_vtbl r600_texture_vtbl;
 582
 583 /* The number of samples can be specified independently of the texture. */
 584 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
 585                                  struct r600_texture *rtex,
 586                                  unsigned nr_samples,
 587                                  struct r600_fmask_info *out)
 588 {
 589         /* FMASK is allocated like an ordinary texture. */
 590         struct pipe_resource templ = rtex->resource.b.b;
 591         struct radeon_surf fmask = {};
 592         unsigned flags, bpe;
 593
 594         memset(out, 0, sizeof(*out));
 595
 596         templ.nr_samples = 1;
 597         flags = rtex->surface.flags | RADEON_SURF_FMASK;
 598
 599         if (rscreen->chip_class <= CAYMAN) {
 600                 /* Use the same parameters and tile mode. */
 601                 fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
 602                 fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
 603                 fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
 604                 fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
 605
 606                 if (nr_samples <= 4)
 607                         fmask.u.legacy.bankh = 4;
 608         }
 609
 610         switch (nr_samples) {
 611         case 2:
 612         case 4:
 613                 bpe = 1;
 614                 break;
 615         case 8:
 616                 bpe = 4;
 617                 break;
 618         default:
 619                 R600_ERR("Invalid sample count for FMASK allocation.\n");
 620                 return;
 621         }
 622
 623         /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
 624          * This can be fixed by writing a separate FMASK allocator specifically
 625          * for R600-R700 asics. */
 626         if (rscreen->chip_class <= R700) {
 627                 bpe *= 2;
 628         }
 629
 630         if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
 631                                       RADEON_SURF_MODE_2D, &fmask)) {
 632                 R600_ERR("Got error in surface_init while allocating FMASK.\n");
 633                 return;
 634         }
 635
 636         assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
 637
 638         out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
 639         if (out->slice_tile_max)
 640                 out->slice_tile_max -= 1;
 641
 642         out->tile_mode_index = fmask.u.legacy.tiling_index[0];
 643         out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
 644         out->bank_height = fmask.u.legacy.bankh;
 645         out->alignment = MAX2(256, fmask.surf_alignment);
 646         out->size = fmask.surf_size;
 647 }
 648
 649 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
 650                                         struct r600_texture *rtex)
 651 {
 652         r600_texture_get_fmask_info(rscreen, rtex,
 653                                     rtex->resource.b.b.nr_samples, &rtex->fmask);
 654
 655         rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
 656         rtex->size = rtex->fmask.offset + rtex->fmask.size;
 657 }
 658
 659 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
 660                                  struct r600_texture *rtex,
 661                                  struct r600_cmask_info *out)
 662 {
 663         unsigned cmask_tile_width = 8;
 664         unsigned cmask_tile_height = 8;
 665         unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
 666         unsigned element_bits = 4;
 667         unsigned cmask_cache_bits = 1024;
 668         unsigned num_pipes = rscreen->info.num_tile_pipes;
 669         unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 670
 671         unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
 672         unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
 673         unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
 674         unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
 675         unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
 676
 677         unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
 678         unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
 679
 680         unsigned base_align = num_pipes * pipe_interleave_bytes;
 681         unsigned slice_bytes =
 682                 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
 683
 684         assert(macro_tile_width % 128 == 0);
 685         assert(macro_tile_height % 128 == 0);
 686
 687         out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
 688         out->alignment = MAX2(256, base_align);
 689         out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 690                     align(slice_bytes, base_align);
 691 }
 692
 693 static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
 694                                       struct r600_texture *rtex,
 695                                       struct r600_cmask_info *out)
 696 {
 697         unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 698         unsigned num_pipes = rscreen->info.num_tile_pipes;
 699         unsigned cl_width, cl_height;
 700
 701         switch (num_pipes) {
 702         case 2:
 703                 cl_width = 32;
 704                 cl_height = 16;
 705                 break;
 706         case 4:
 707                 cl_width = 32;
 708                 cl_height = 32;
 709                 break;
 710         case 8:
 711                 cl_width = 64;
 712                 cl_height = 32;
 713                 break;
 714         case 16: /* Hawaii */
 715                 cl_width = 64;
 716                 cl_height = 64;
 717                 break;
 718         default:
 719                 assert(0);
 720                 return;
 721         }
 722
 723         unsigned base_align = num_pipes * pipe_interleave_bytes;
 724
 725         unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
 726         unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
 727         unsigned slice_elements = (width * height) / (8*8);
 728
 729         /* Each element of CMASK is a nibble. */
 730         unsigned slice_bytes = slice_elements / 2;
 731
 732         out->slice_tile_max = (width * height) / (128*128);
 733         if (out->slice_tile_max)
 734                 out->slice_tile_max -= 1;
 735
 736         out->alignment = MAX2(256, base_align);
 737         out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 738                     align(slice_bytes, base_align);
 739 }
 740
 741 static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
 742                                         struct r600_texture *rtex)
 743 {
 744         if (rscreen->chip_class >= SI) {
 745                 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 746         } else {
 747                 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 748         }
 749
 750         rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
 751         rtex->size = rtex->cmask.offset + rtex->cmask.size;
 752
 753         if (rscreen->chip_class >= SI)
 754                 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
 755         else
 756                 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
 757 }
 758
 759 static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
 760                                               struct r600_texture *rtex)
 761 {
 762         if (rtex->cmask_buffer)
 763                 return;
 764
 765         assert(rtex->cmask.size == 0);
 766
 767         if (rscreen->chip_class >= SI) {
 768                 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 769         } else {
 770                 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 771         }
 772
 773         rtex->cmask_buffer = (struct r600_resource *)
 774                 r600_aligned_buffer_create(&rscreen->b,
 775                                            R600_RESOURCE_FLAG_UNMAPPABLE,
 776                                            PIPE_USAGE_DEFAULT,
 777                                            rtex->cmask.size,
 778                                            rtex->cmask.alignment);
 779         if (rtex->cmask_buffer == NULL) {
 780                 rtex->cmask.size = 0;
 781                 return;
 782         }
 783
 784         /* update colorbuffer state bits */
 785         rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
 786
 787         if (rscreen->chip_class >= SI)
 788                 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
 789         else
 790                 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
 791
 792         p_atomic_inc(&rscreen->compressed_colortex_counter);
 793 }
 794
 795 static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 796                                         struct r600_texture *rtex)
 797 {
 798         unsigned cl_width, cl_height, width, height;
 799         unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
 800         unsigned num_pipes = rscreen->info.num_tile_pipes;
 801
 802         rtex->surface.htile_size = 0;
 803
 804         if (rscreen->chip_class <= EVERGREEN &&
 805             rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
 806                 return;
 807
 808         /* HW bug on R6xx. */
 809         if (rscreen->chip_class == R600 &&
 810             (rtex->resource.b.b.width0 > 7680 ||
 811              rtex->resource.b.b.height0 > 7680))
 812                 return;
 813
 814         /* HTILE is broken with 1D tiling on old kernels and CIK. */
 815         if (rscreen->chip_class >= CIK &&
 816             rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
 817             rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
 818                 return;
 819
 820         /* Overalign HTILE on P2 configs to work around GPU hangs in
 821          * piglit/depthstencil-render-miplevels 585.
 822          *
 823          * This has been confirmed to help Kabini & Stoney, where the hangs
 824          * are always reproducible. I think I have seen the test hang
 825          * on Carrizo too, though it was very rare there.
 826          */
 827         if (rscreen->chip_class >= CIK && num_pipes < 4)
 828                 num_pipes = 4;
 829
 830         switch (num_pipes) {
 831         case 1:
 832                 cl_width = 32;
 833                 cl_height = 16;
 834                 break;
 835         case 2:
 836                 cl_width = 32;
 837                 cl_height = 32;
 838                 break;
 839         case 4:
 840                 cl_width = 64;
 841                 cl_height = 32;
 842                 break;
 843         case 8:
 844                 cl_width = 64;
 845                 cl_height = 64;
 846                 break;
 847         case 16:
 848                 cl_width = 128;
 849                 cl_height = 64;
 850                 break;
 851         default:
 852                 assert(0);
 853                 return;
 854         }
 855
 856         width = align(rtex->resource.b.b.width0, cl_width * 8);
 857         height = align(rtex->resource.b.b.height0, cl_height * 8);
 858
 859         slice_elements = (width * height) / (8 * 8);
 860         slice_bytes = slice_elements * 4;
 861
 862         pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 863         base_align = num_pipes * pipe_interleave_bytes;
 864
 865         rtex->surface.htile_alignment = base_align;
 866         rtex->surface.htile_size =
 867                 (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 868                 align(slice_bytes, base_align);
 869 }
 870
 871 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
 872                                         struct r600_texture *rtex)
 873 {
 874         uint32_t clear_value;
 875
 876         if (rtex->tc_compatible_htile) {
 877                 clear_value = 0x0000030F;
 878         } else {
 879                 r600_texture_get_htile_size(rscreen, rtex);
 880                 clear_value = 0;
 881         }
 882
 883         if (!rtex->surface.htile_size)
 884                 return;
 885
 886         rtex->htile_buffer = (struct r600_resource*)
 887                 r600_aligned_buffer_create(&rscreen->b,
 888                                            R600_RESOURCE_FLAG_UNMAPPABLE,
 889                                            PIPE_USAGE_DEFAULT,
 890                                            rtex->surface.htile_size,
 891                                            rtex->surface.htile_alignment);
 892         if (rtex->htile_buffer == NULL) {
 893                 /* this is not a fatal error as we can still keep rendering
 894                  * without htile buffer */
 895                 R600_ERR("Failed to create buffer object for htile buffer.\n");
 896         } else {
 897                 r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
 898                                          0, rtex->surface.htile_size,
 899                                          clear_value);
 900         }
 901 }
 902
 903 void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
 904 {
 905         int i;
 906
 907         fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
 908                 "blk_h=%u, array_size=%u, last_level=%u, "
 909                 "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
 910                 rtex->resource.b.b.width0, rtex->resource.b.b.height0,
 911                 rtex->resource.b.b.depth0, rtex->surface.blk_w,
 912                 rtex->surface.blk_h,
 913                 rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
 914                 rtex->surface.bpe, rtex->resource.b.b.nr_samples,
 915                 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
 916
 917         fprintf(f, "  Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
 918                 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
 919                 rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
 920                 rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
 921                 rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
 922                 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
 923
 924         if (rtex->fmask.size)
 925                 fprintf(f, "  FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
 926                         "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
 927                         rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
 928                         rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
 929                         rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
 930
 931         if (rtex->cmask.size)
 932                 fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
 933                         "slice_tile_max=%u\n",
 934                         rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
 935                         rtex->cmask.slice_tile_max);
 936
 937         if (rtex->htile_buffer)
 938                 fprintf(f, "  HTile: size=%u, alignment=%u, TC_compatible = %u\n",
 939                         rtex->htile_buffer->b.b.width0,
 940                         rtex->htile_buffer->buf->alignment,
 941                         rtex->tc_compatible_htile);
 942
 943         if (rtex->dcc_offset) {
 944                 fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
 945                         rtex->dcc_offset, rtex->surface.dcc_size,
 946                         rtex->surface.dcc_alignment);
 947                 for (i = 0; i <= rtex->resource.b.b.last_level; i++)
 948                         fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
 949                                 "fast_clear_size=%"PRIu64"\n",
 950                                 i, i < rtex->surface.num_dcc_levels,
 951                                 rtex->surface.u.legacy.level[i].dcc_offset,
 952                                 rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
 953         }
 954
 955         for (i = 0; i <= rtex->resource.b.b.last_level; i++)
 956                 fprintf(f, "  Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
 957                         "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
 958                         "mode=%u, tiling_index = %u\n",
 959                         i, rtex->surface.u.legacy.level[i].offset,
 960                         rtex->surface.u.legacy.level[i].slice_size,
 961                         u_minify(rtex->resource.b.b.width0, i),
 962                         u_minify(rtex->resource.b.b.height0, i),
 963                         u_minify(rtex->resource.b.b.depth0, i),
 964                         rtex->surface.u.legacy.level[i].nblk_x,
 965                         rtex->surface.u.legacy.level[i].nblk_y,
 966                         rtex->surface.u.legacy.level[i].mode,
 967                         rtex->surface.u.legacy.tiling_index[i]);
 968
 969         if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
 970                 fprintf(f, "  StencilLayout: tilesplit=%u\n",
 971                         rtex->surface.u.legacy.stencil_tile_split);
 972                 for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
 973                         fprintf(f, "  StencilLevel[%i]: offset=%"PRIu64", "
 974                                 "slice_size=%"PRIu64", npix_x=%u, "
 975                                 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
 976                                 "mode=%u, tiling_index = %u\n",
 977                                 i, rtex->surface.u.legacy.stencil_level[i].offset,
 978                                 rtex->surface.u.legacy.stencil_level[i].slice_size,
 979                                 u_minify(rtex->resource.b.b.width0, i),
 980                                 u_minify(rtex->resource.b.b.height0, i),
 981                                 u_minify(rtex->resource.b.b.depth0, i),
 982                                 rtex->surface.u.legacy.stencil_level[i].nblk_x,
 983                                 rtex->surface.u.legacy.stencil_level[i].nblk_y,
 984                                 rtex->surface.u.legacy.stencil_level[i].mode,
 985                                 rtex->surface.u.legacy.stencil_tiling_index[i]);
 986                 }
 987         }
 988 }
 989
 990 /* Common processing for r600_texture_create and r600_texture_from_handle */
 991 static struct r600_texture *
 992 r600_texture_create_object(struct pipe_screen *screen,
 993                            const struct pipe_resource *base,
 994                            struct pb_buffer *buf,
 995                            struct radeon_surf *surface)
 996 {
 997         struct r600_texture *rtex;
 998         struct r600_resource *resource;
 999         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1000
1001         rtex = CALLOC_STRUCT(r600_texture);
1002         if (!rtex)
1003                 return NULL;
1004
1005         resource = &rtex->resource;
1006         resource->b.b = *base;
1007         resource->b.b.next = NULL;
1008         resource->b.vtbl = &r600_texture_vtbl;
1009         pipe_reference_init(&resource->b.b.reference, 1);
1010         resource->b.b.screen = screen;
1011
1012         /* don't include stencil-only formats which we don't support for rendering */
1013         rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
1014
1015         rtex->surface = *surface;
1016         rtex->size = rtex->surface.surf_size;
1017
1018         rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
1019                                     (rtex->surface.flags &
1020                                      RADEON_SURF_TC_COMPATIBLE_HTILE);
1021
1022         /* TC-compatible HTILE:
1023          * - VI only supports Z32_FLOAT.
1024          * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
1025         if (rtex->tc_compatible_htile) {
1026                 if (rscreen->chip_class >= GFX9 &&
1027                     base->format == PIPE_FORMAT_Z16_UNORM)
1028                         rtex->db_render_format = base->format;
1029                 else
1030                         rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
1031         } else {
1032                 rtex->db_render_format = base->format;
1033         }
1034
1035         /* Tiled depth textures utilize the non-displayable tile order.
1036          * This must be done after r600_setup_surface.
1037          * Applies to R600-Cayman. */
1038         rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
1039         /* Applies to GCN. */
1040         rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
1041
1042         /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
1043          * between frames, so the only thing that can enable separate DCC
1044          * with DRI2 is multiple slow clears within a frame.
1045          */
1046         rtex->ps_draw_ratio = 0;
1047
1048         if (rtex->is_depth) {
1049                 if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1050                                    R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
1051                     rscreen->chip_class >= EVERGREEN) {
1052                         rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
1053                         rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
1054                 } else {
1055                         if (rtex->resource.b.b.nr_samples <= 1 &&
1056                             (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
1057                              rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
1058                                 rtex->can_sample_z = true;
1059                 }
1060
1061                 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1062                                      R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
1063                         rtex->db_compatible = true;
1064
1065                         if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
1066                                 r600_texture_allocate_htile(rscreen, rtex);
1067                 }
1068         } else {
1069                 if (base->nr_samples > 1) {
1070                         if (!buf) {
1071                                 r600_texture_allocate_fmask(rscreen, rtex);
1072                                 r600_texture_allocate_cmask(rscreen, rtex);
1073                                 rtex->cmask_buffer = &rtex->resource;
1074                         }
1075                         if (!rtex->fmask.size || !rtex->cmask.size) {
1076                                 FREE(rtex);
1077                                 return NULL;
1078                         }
1079                 }
1080
1081                 /* Shared textures must always set up DCC here.
1082                  * If it's not present, it will be disabled by
1083                  * apply_opaque_metadata later.
1084                  */
1085                 if (rtex->surface.dcc_size &&
1086                     (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
1087                     !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
1088                         /* Reserve space for the DCC buffer. */
1089                         rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
1090                         rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
1091                 }
1092         }
1093
1094         /* Now create the backing buffer. */
1095         if (!buf) {
1096                 r600_init_resource_fields(rscreen, resource, rtex->size,
1097                                           rtex->surface.surf_alignment);
1098
1099                 resource->flags |= RADEON_FLAG_HANDLE;
1100
1101                 if (!r600_alloc_resource(rscreen, resource)) {
1102                         FREE(rtex);
1103                         return NULL;
1104                 }
1105         } else {
1106                 resource->buf = buf;
1107                 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
1108                 resource->bo_size = buf->size;
1109                 resource->bo_alignment = buf->alignment;
1110                 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
1111                 if (resource->domains & RADEON_DOMAIN_VRAM)
1112                         resource->vram_usage = buf->size;
1113                 else if (resource->domains & RADEON_DOMAIN_GTT)
1114                         resource->gart_usage = buf->size;
1115         }
1116
1117         if (rtex->cmask.size) {
1118                 /* Initialize the cmask to 0xCC (= compressed state). */
1119                 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
1120                                          rtex->cmask.offset, rtex->cmask.size,
1121                                          0xCCCCCCCC);
1122         }
1123
1124         /* Initialize DCC only if the texture is not being imported. */
1125         if (!buf && rtex->dcc_offset) {
1126                 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
1127                                          rtex->dcc_offset,
1128                                          rtex->surface.dcc_size,
1129                                          0xFFFFFFFF);
1130         }
1131
1132         /* Initialize the CMASK base register value. */
1133         rtex->cmask.base_address_reg =
1134                 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1135
1136         if (rscreen->debug_flags & DBG_VM) {
1137                 fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1138                         rtex->resource.gpu_address,
1139                         rtex->resource.gpu_address + rtex->resource.buf->size,
1140                         base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
1141                         base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1142         }
1143
1144         if (rscreen->debug_flags & DBG_TEX) {
1145                 puts("Texture:");
1146                 r600_print_texture_info(rtex, stdout);
1147                 fflush(stdout);
1148         }
1149
1150         return rtex;
1151 }
1152
1153 static enum radeon_surf_mode
1154 r600_choose_tiling(struct r600_common_screen *rscreen,
1155                    const struct pipe_resource *templ)
1156 {
1157         const struct util_format_description *desc = util_format_description(templ->format);
1158         bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
1159
1160         /* MSAA resources must be 2D tiled. */
1161         if (templ->nr_samples > 1)
1162                 return RADEON_SURF_MODE_2D;
1163
1164         /* Transfer resources should be linear. */
1165         if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
1166                 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1167
1168         /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
1169         if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
1170             (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
1171             (templ->target == PIPE_TEXTURE_2D ||
1172              templ->target == PIPE_TEXTURE_3D))
1173                 force_tiling = true;
1174
1175         /* Handle common candidates for the linear mode.
1176          * Compressed textures and DB surfaces must always be tiled.
1177          */
1178         if (!force_tiling && !util_format_is_compressed(templ->format) &&
1179             (!util_format_is_depth_or_stencil(templ->format) ||
1180              templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)) {
1181                 if (rscreen->debug_flags & DBG_NO_TILING)
1182                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1183
1184                 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
1185                 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1186                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1187
1188                 /* Cursors are linear on SI.
1189                  * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1190                 if (rscreen->chip_class >= SI &&
1191                     (templ->bind & PIPE_BIND_CURSOR))
1192                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1193
1194                 if (templ->bind & PIPE_BIND_LINEAR)
1195                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1196
1197                 /* Textures with a very small height are recommended to be linear. */
1198                 if (templ->target == PIPE_TEXTURE_1D ||
1199                     templ->target == PIPE_TEXTURE_1D_ARRAY ||
1200                     /* Only very thin and long 2D textures should benefit from
1201                      * linear_aligned. */
1202                     (templ->width0 > 8 && templ->height0 <= 2))
1203                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1204
1205                 /* Textures likely to be mapped often. */
1206                 if (templ->usage == PIPE_USAGE_STAGING ||
1207                     templ->usage == PIPE_USAGE_STREAM)
1208                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1209         }
1210
1211         /* Make small textures 1D tiled. */
1212         if (templ->width0 <= 16 || templ->height0 <= 16 ||
1213             (rscreen->debug_flags & DBG_NO_2D_TILING))
1214                 return RADEON_SURF_MODE_1D;
1215
1216         /* The allocator will switch to 1D if needed. */
1217         return RADEON_SURF_MODE_2D;
1218 }
1219
1220 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
1221                                           const struct pipe_resource *templ)
1222 {
1223         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1224         struct radeon_surf surface = {0};
1225         bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1226         bool tc_compatible_htile =
1227                 rscreen->chip_class >= VI &&
1228                 (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1229                 !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
1230                 !is_flushed_depth &&
1231                 templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
1232                 util_format_is_depth_or_stencil(templ->format);
1233
1234         int r;
1235
1236         r = r600_init_surface(rscreen, &surface, templ,
1237                               r600_choose_tiling(rscreen, templ), 0, 0,
1238                               false, false, is_flushed_depth,
1239                               tc_compatible_htile);
1240         if (r) {
1241                 return NULL;
1242         }
1243
1244         return (struct pipe_resource *)
1245                r600_texture_create_object(screen, templ, NULL, &surface);
1246 }
1247
1248 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
1249                                                       const struct pipe_resource *templ,
1250                                                       struct winsys_handle *whandle,
1251                                                       unsigned usage)
1252 {
1253         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1254         struct pb_buffer *buf = NULL;
1255         unsigned stride = 0, offset = 0;
1256         unsigned array_mode;
1257         struct radeon_surf surface;
1258         int r;
1259         struct radeon_bo_metadata metadata = {};
1260         struct r600_texture *rtex;
1261
1262         /* Support only 2D textures without mipmaps */
1263         if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
1264               templ->depth0 != 1 || templ->last_level != 0)
1265                 return NULL;
1266
1267         buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
1268         if (!buf)
1269                 return NULL;
1270
1271         rscreen->ws->buffer_get_metadata(buf, &metadata);
1272
1273         if (rscreen->chip_class >= GFX9) {
1274         } else {
1275                 surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
1276                 surface.u.legacy.bankw = metadata.u.legacy.bankw;
1277                 surface.u.legacy.bankh = metadata.u.legacy.bankh;
1278                 surface.u.legacy.tile_split = metadata.u.legacy.tile_split;
1279                 surface.u.legacy.mtilea = metadata.u.legacy.mtilea;
1280                 surface.u.legacy.num_banks = metadata.u.legacy.num_banks;
1281
1282                 if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED)
1283                         array_mode = RADEON_SURF_MODE_2D;
1284                 else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED)
1285                         array_mode = RADEON_SURF_MODE_1D;
1286                 else
1287                         array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1288         }
1289
1290         r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
1291                               offset, true, metadata.u.legacy.scanout, false, false);
1292         if (r) {
1293                 return NULL;
1294         }
1295
1296         rtex = r600_texture_create_object(screen, templ, buf, &surface);
1297         if (!rtex)
1298                 return NULL;
1299
1300         rtex->resource.is_shared = true;
1301         rtex->resource.external_usage = usage;
1302
1303         if (rscreen->apply_opaque_metadata)
1304                 rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
1305
1306         return &rtex->resource.b.b;
1307 }
1308
1309 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
1310                                      struct pipe_resource *texture,
1311                                      struct r600_texture **staging)
1312 {
1313         struct r600_texture *rtex = (struct r600_texture*)texture;
1314         struct pipe_resource resource;
1315         struct r600_texture **flushed_depth_texture = staging ?
1316                         staging : &rtex->flushed_depth_texture;
1317         enum pipe_format pipe_format = texture->format;
1318
1319         if (!staging) {
1320                 if (rtex->flushed_depth_texture)
1321                         return true; /* it's ready */
1322
1323                 if (!rtex->can_sample_z && rtex->can_sample_s) {
1324                         switch (pipe_format) {
1325                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1326                                 /* Save memory by not allocating the S plane. */
1327                                 pipe_format = PIPE_FORMAT_Z32_FLOAT;
1328                                 break;
1329                         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1330                         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1331                                 /* Save memory bandwidth by not copying the
1332                                  * stencil part during flush.
1333                                  *
1334                                  * This potentially increases memory bandwidth
1335                                  * if an application uses both Z and S texturing
1336                                  * simultaneously (a flushed Z24S8 texture
1337                                  * would be stored compactly), but how often
1338                                  * does that really happen?
1339                                  */
1340                                 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1341                                 break;
1342                         default:;
1343                         }
1344                 } else if (!rtex->can_sample_s && rtex->can_sample_z) {
1345                         assert(util_format_has_stencil(util_format_description(pipe_format)));
1346
1347                         /* DB->CB copies to an 8bpp surface don't work. */
1348                         pipe_format = PIPE_FORMAT_X24S8_UINT;
1349                 }
1350         }
1351
1352         memset(&resource, 0, sizeof(resource));
1353         resource.target = texture->target;
1354         resource.format = pipe_format;
1355         resource.width0 = texture->width0;
1356         resource.height0 = texture->height0;
1357         resource.depth0 = texture->depth0;
1358         resource.array_size = texture->array_size;
1359         resource.last_level = texture->last_level;
1360         resource.nr_samples = texture->nr_samples;
1361         resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1362         resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1363         resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1364
1365         if (staging)
1366                 resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
1367
1368         *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1369         if (*flushed_depth_texture == NULL) {
1370                 R600_ERR("failed to create temporary texture to hold flushed depth\n");
1371                 return false;
1372         }
1373
1374         (*flushed_depth_texture)->non_disp_tiling = false;
1375         return true;
1376 }
1377
1378 /**
1379  * Initialize the pipe_resource descriptor to be of the same size as the box,
1380  * which is supposed to hold a subregion of the texture "orig" at the given
1381  * mipmap level.
1382  */
1383 static void r600_init_temp_resource_from_box(struct pipe_resource *res,
1384                                              struct pipe_resource *orig,
1385                                              const struct pipe_box *box,
1386                                              unsigned level, unsigned flags)
1387 {
1388         memset(res, 0, sizeof(*res));
1389         res->format = orig->format;
1390         res->width0 = box->width;
1391         res->height0 = box->height;
1392         res->depth0 = 1;
1393         res->array_size = 1;
1394         res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1395         res->flags = flags;
1396
1397         /* We must set the correct texture target and dimensions for a 3D box. */
1398         if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1399                 res->target = PIPE_TEXTURE_2D_ARRAY;
1400                 res->array_size = box->depth;
1401         } else {
1402                 res->target = PIPE_TEXTURE_2D;
1403         }
1404 }
1405
1406 static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
1407                                         struct r600_texture *rtex,
1408                                         unsigned transfer_usage,
1409                                         const struct pipe_box *box)
1410 {
1411         /* r600g doesn't react to dirty_tex_descriptor_counter */
1412         return rscreen->chip_class >= SI &&
1413                 !rtex->resource.is_shared &&
1414                 !(transfer_usage & PIPE_TRANSFER_READ) &&
1415                 rtex->resource.b.b.last_level == 0 &&
1416                 util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
1417                                                  box->x, box->y, box->z,
1418                                                  box->width, box->height,
1419                                                  box->depth);
1420 }
1421
1422 static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
1423                                             struct r600_texture *rtex)
1424 {
1425         struct r600_common_screen *rscreen = rctx->screen;
1426
1427         /* There is no point in discarding depth and tiled buffers. */
1428         assert(!rtex->is_depth);
1429         assert(rtex->surface.is_linear);
1430
1431         /* Reallocate the buffer in the same pipe_resource. */
1432         r600_alloc_resource(rscreen, &rtex->resource);
1433
1434         /* Initialize the CMASK base address (needed even without CMASK). */
1435         rtex->cmask.base_address_reg =
1436                 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1437
1438         p_atomic_inc(&rscreen->dirty_tex_counter);
1439
1440         rctx->num_alloc_tex_transfer_bytes += rtex->size;
1441 }
1442
1443 static void *r600_texture_transfer_map(struct pipe_context *ctx,
1444                                        struct pipe_resource *texture,
1445                                        unsigned level,
1446                                        unsigned usage,
1447                                        const struct pipe_box *box,
1448                                        struct pipe_transfer **ptransfer)
1449 {
1450         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1451         struct r600_texture *rtex = (struct r600_texture*)texture;
1452         struct r600_transfer *trans;
1453         struct r600_resource *buf;
1454         unsigned offset = 0;
1455         char *map;
1456         bool use_staging_texture = false;
1457
1458         assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
1459         assert(box->width && box->height && box->depth);
1460
1461         /* Depth textures use staging unconditionally. */
1462         if (!rtex->is_depth) {
1463                 /* Degrade the tile mode if we get too many transfers on APUs.
1464                  * On dGPUs, the staging texture is always faster.
1465                  * Only count uploads that are at least 4x4 pixels large.
1466                  */
1467                 if (!rctx->screen->info.has_dedicated_vram &&
1468                     level == 0 &&
1469                     box->width >= 4 && box->height >= 4 &&
1470                     p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
1471                         bool can_invalidate =
1472                                 r600_can_invalidate_texture(rctx->screen, rtex,
1473                                                             usage, box);
1474
1475                         r600_degrade_tile_mode_to_linear(rctx, rtex,
1476                                                          can_invalidate);
1477                 }
1478
1479                 /* Tiled textures need to be converted into a linear texture for CPU
1480                  * access. The staging texture is always linear and is placed in GART.
1481                  *
1482                  * Reading from VRAM or GTT WC is slow, always use the staging
1483                  * texture in this case.
1484                  *
1485                  * Use the staging texture for uploads if the underlying BO
1486                  * is busy.
1487                  */
1488                 if (!rtex->surface.is_linear)
1489                         use_staging_texture = true;
1490                 else if (usage & PIPE_TRANSFER_READ)
1491                         use_staging_texture =
1492                                 rtex->resource.domains & RADEON_DOMAIN_VRAM ||
1493                                 rtex->resource.flags & RADEON_FLAG_GTT_WC;
1494                 /* Write & linear only: */
1495                 else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
1496                                                          RADEON_USAGE_READWRITE) ||
1497                          !rctx->ws->buffer_wait(rtex->resource.buf, 0,
1498                                                 RADEON_USAGE_READWRITE)) {
1499                         /* It's busy. */
1500                         if (r600_can_invalidate_texture(rctx->screen, rtex,
1501                                                         usage, box))
1502                                 r600_texture_invalidate_storage(rctx, rtex);
1503                         else
1504                                 use_staging_texture = true;
1505                 }
1506         }
1507
1508         trans = CALLOC_STRUCT(r600_transfer);
1509         if (!trans)
1510                 return NULL;
1511         pipe_resource_reference(&trans->transfer.resource, texture);
1512         trans->transfer.level = level;
1513         trans->transfer.usage = usage;
1514         trans->transfer.box = *box;
1515
1516         if (rtex->is_depth) {
1517                 struct r600_texture *staging_depth;
1518
1519                 if (rtex->resource.b.b.nr_samples > 1) {
1520                         /* MSAA depth buffers need to be converted to single sample buffers.
1521                          *
1522                          * Mapping MSAA depth buffers can occur if ReadPixels is called
1523                          * with a multisample GLX visual.
1524                          *
1525                          * First downsample the depth buffer to a temporary texture,
1526                          * then decompress the temporary one to staging.
1527                          *
1528                          * Only the region being mapped is transfered.
1529                          */
1530                         struct pipe_resource resource;
1531
1532                         r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
1533
1534                         if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
1535                                 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1536                                 FREE(trans);
1537                                 return NULL;
1538                         }
1539
1540                         if (usage & PIPE_TRANSFER_READ) {
1541                                 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
1542                                 if (!temp) {
1543                                         R600_ERR("failed to create a temporary depth texture\n");
1544                                         FREE(trans);
1545                                         return NULL;
1546                                 }
1547
1548                                 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
1549                                 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
1550                                                             0, 0, 0, box->depth, 0, 0);
1551                                 pipe_resource_reference(&temp, NULL);
1552                         }
1553                 }
1554                 else {
1555                         /* XXX: only readback the rectangle which is being mapped? */
1556                         /* XXX: when discard is true, no need to read back from depth texture */
1557                         if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
1558                                 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1559                                 FREE(trans);
1560                                 return NULL;
1561                         }
1562
1563                         rctx->blit_decompress_depth(ctx, rtex, staging_depth,
1564                                                     level, level,
1565                                                     box->z, box->z + box->depth - 1,
1566                                                     0, 0);
1567
1568                         offset = r600_texture_get_offset(staging_depth, level, box);
1569                 }
1570
1571                 trans->transfer.stride = staging_depth->surface.u.legacy.level[level].nblk_x *
1572                                          staging_depth->surface.bpe;
1573                 trans->transfer.layer_stride = staging_depth->surface.u.legacy.level[level].slice_size;
1574                 trans->staging = (struct r600_resource*)staging_depth;
1575                 buf = trans->staging;
1576         } else if (use_staging_texture) {
1577                 struct pipe_resource resource;
1578                 struct r600_texture *staging;
1579
1580                 r600_init_temp_resource_from_box(&resource, texture, box, level,
1581                                                  R600_RESOURCE_FLAG_TRANSFER);
1582                 resource.usage = (usage & PIPE_TRANSFER_READ) ?
1583                         PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1584
1585                 /* Create the temporary texture. */
1586                 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
1587                 if (!staging) {
1588                         R600_ERR("failed to create temporary texture to hold untiled copy\n");
1589                         FREE(trans);
1590                         return NULL;
1591                 }
1592                 trans->staging = &staging->resource;
1593                 trans->transfer.stride = staging->surface.u.legacy.level[0].nblk_x *
1594                                          staging->surface.bpe;
1595                 trans->transfer.layer_stride = staging->surface.u.legacy.level[0].slice_size;
1596
1597                 if (usage & PIPE_TRANSFER_READ)
1598                         r600_copy_to_staging_texture(ctx, trans);
1599                 else
1600                         usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1601
1602                 buf = trans->staging;
1603         } else {
1604                 /* the resource is mapped directly */
1605                 trans->transfer.stride = rtex->surface.u.legacy.level[level].nblk_x *
1606                                          rtex->surface.bpe;
1607                 trans->transfer.layer_stride = rtex->surface.u.legacy.level[level].slice_size;
1608                 offset = r600_texture_get_offset(rtex, level, box);
1609                 buf = &rtex->resource;
1610         }
1611
1612         if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
1613                 r600_resource_reference(&trans->staging, NULL);
1614                 FREE(trans);
1615                 return NULL;
1616         }
1617
1618         *ptransfer = &trans->transfer;
1619         return map + offset;
1620 }
1621
1622 static void r600_texture_transfer_unmap(struct pipe_context *ctx,
1623                                         struct pipe_transfer* transfer)
1624 {
1625         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1626         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
1627         struct pipe_resource *texture = transfer->resource;
1628         struct r600_texture *rtex = (struct r600_texture*)texture;
1629
1630         if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
1631                 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
1632                         ctx->resource_copy_region(ctx, texture, transfer->level,
1633                                                   transfer->box.x, transfer->box.y, transfer->box.z,
1634                                                   &rtransfer->staging->b.b, transfer->level,
1635                                                   &transfer->box);
1636                 } else {
1637                         r600_copy_from_staging_texture(ctx, rtransfer);
1638                 }
1639         }
1640
1641         if (rtransfer->staging) {
1642                 rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
1643                 r600_resource_reference(&rtransfer->staging, NULL);
1644         }
1645
1646         /* Heuristic for {upload, draw, upload, draw, ..}:
1647          *
1648          * Flush the gfx IB if we've allocated too much texture storage.
1649          *
1650          * The idea is that we don't want to build IBs that use too much
1651          * memory and put pressure on the kernel memory manager and we also
1652          * want to make temporary and invalidated buffers go idle ASAP to
1653          * decrease the total memory usage or make them reusable. The memory
1654          * usage will be slightly higher than given here because of the buffer
1655          * cache in the winsys.
1656          *
1657          * The result is that the kernel memory manager is never a bottleneck.
1658          */
1659         if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
1660                 rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
1661                 rctx->num_alloc_tex_transfer_bytes = 0;
1662         }
1663
1664         pipe_resource_reference(&transfer->resource, NULL);
1665         FREE(transfer);
1666 }
1667
1668 static const struct u_resource_vtbl r600_texture_vtbl =
1669 {
1670         NULL,                           /* get_handle */
1671         r600_texture_destroy,           /* resource_destroy */
1672         r600_texture_transfer_map,      /* transfer_map */
1673         u_default_transfer_flush_region, /* transfer_flush_region */
1674         r600_texture_transfer_unmap,    /* transfer_unmap */
1675 };
1676
1677 /* DCC channel type categories within which formats can be reinterpreted
1678  * while keeping the same DCC encoding. The swizzle must also match. */
1679 enum dcc_channel_type {
1680         dcc_channel_float32,
1681         dcc_channel_uint32,
1682         dcc_channel_sint32,
1683         dcc_channel_float16,
1684         dcc_channel_uint16,
1685         dcc_channel_sint16,
1686         dcc_channel_uint_10_10_10_2,
1687         dcc_channel_uint8,
1688         dcc_channel_sint8,
1689         dcc_channel_incompatible,
1690 };
1691
1692 /* Return the type of DCC encoding. */
1693 static enum dcc_channel_type
1694 vi_get_dcc_channel_type(const struct util_format_description *desc)
1695 {
1696         int i;
1697
1698         /* Find the first non-void channel. */
1699         for (i = 0; i < desc->nr_channels; i++)
1700                 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
1701                         break;
1702         if (i == desc->nr_channels)
1703                 return dcc_channel_incompatible;
1704
1705         switch (desc->channel[i].size) {
1706         case 32:
1707                 if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
1708                         return dcc_channel_float32;
1709                 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
1710                         return dcc_channel_uint32;
1711                 return dcc_channel_sint32;
1712         case 16:
1713                 if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
1714                         return dcc_channel_float16;
1715                 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
1716                         return dcc_channel_uint16;
1717                 return dcc_channel_sint16;
1718         case 10:
1719                 return dcc_channel_uint_10_10_10_2;
1720         case 8:
1721                 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
1722                         return dcc_channel_uint8;
1723                 return dcc_channel_sint8;
1724         default:
1725                 return dcc_channel_incompatible;
1726         }
1727 }
1728
1729 /* Return if it's allowed to reinterpret one format as another with DCC enabled. */
1730 bool vi_dcc_formats_compatible(enum pipe_format format1,
1731                                enum pipe_format format2)
1732 {
1733         const struct util_format_description *desc1, *desc2;
1734         enum dcc_channel_type type1, type2;
1735         int i;
1736
1737         if (format1 == format2)
1738                 return true;
1739
1740         desc1 = util_format_description(format1);
1741         desc2 = util_format_description(format2);
1742
1743         if (desc1->nr_channels != desc2->nr_channels)
1744                 return false;
1745
1746         /* Swizzles must be the same. */
1747         for (i = 0; i < desc1->nr_channels; i++)
1748                 if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
1749                     desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
1750                     desc1->swizzle[i] != desc2->swizzle[i])
1751                         return false;
1752
1753         type1 = vi_get_dcc_channel_type(desc1);
1754         type2 = vi_get_dcc_channel_type(desc2);
1755
1756         return type1 != dcc_channel_incompatible &&
1757                type2 != dcc_channel_incompatible &&
1758                type1 == type2;
1759 }
1760
1761 void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx,
1762                                            struct pipe_resource *tex,
1763                                            unsigned level,
1764                                            enum pipe_format view_format)
1765 {
1766         struct r600_texture *rtex = (struct r600_texture *)tex;
1767
1768         if (rtex->dcc_offset &&
1769             level < rtex->surface.num_dcc_levels &&
1770             !vi_dcc_formats_compatible(tex->format, view_format))
1771                 if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex))
1772                         rctx->decompress_dcc(&rctx->b, rtex);
1773 }
1774
1775 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
1776                                                 struct pipe_resource *texture,
1777                                                 const struct pipe_surface *templ,
1778                                                 unsigned width, unsigned height)
1779 {
1780         struct r600_common_context *rctx = (struct r600_common_context*)pipe;
1781         struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
1782
1783         if (!surface)
1784                 return NULL;
1785
1786         assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
1787         assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
1788
1789         pipe_reference_init(&surface->base.reference, 1);
1790         pipe_resource_reference(&surface->base.texture, texture);
1791         surface->base.context = pipe;
1792         surface->base.format = templ->format;
1793         surface->base.width = width;
1794         surface->base.height = height;
1795         surface->base.u = templ->u;
1796
1797         if (texture->target != PIPE_BUFFER)
1798                 vi_dcc_disable_if_incompatible_format(rctx, texture,
1799                                                       templ->u.tex.level,
1800                                                       templ->format);
1801
1802         return &surface->base;
1803 }
1804
1805 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
1806                                                 struct pipe_resource *tex,
1807                                                 const struct pipe_surface *templ)
1808 {
1809         unsigned level = templ->u.tex.level;
1810         unsigned width = u_minify(tex->width0, level);
1811         unsigned height = u_minify(tex->height0, level);
1812
1813         if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
1814                 const struct util_format_description *tex_desc
1815                         = util_format_description(tex->format);
1816                 const struct util_format_description *templ_desc
1817                         = util_format_description(templ->format);
1818
1819                 assert(tex_desc->block.bits == templ_desc->block.bits);
1820
1821                 /* Adjust size of surface if and only if the block width or
1822                  * height is changed. */
1823                 if (tex_desc->block.width != templ_desc->block.width ||
1824                     tex_desc->block.height != templ_desc->block.height) {
1825                         unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
1826                         unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
1827
1828                         width = nblks_x * templ_desc->block.width;
1829                         height = nblks_y * templ_desc->block.height;
1830                 }
1831         }
1832
1833         return r600_create_surface_custom(pipe, tex, templ, width, height);
1834 }
1835
1836 static void r600_surface_destroy(struct pipe_context *pipe,
1837                                  struct pipe_surface *surface)
1838 {
1839         struct r600_surface *surf = (struct r600_surface*)surface;
1840         r600_resource_reference(&surf->cb_buffer_fmask, NULL);
1841         r600_resource_reference(&surf->cb_buffer_cmask, NULL);
1842         pipe_resource_reference(&surface->texture, NULL);
1843         FREE(surface);
1844 }
1845
1846 static void r600_clear_texture(struct pipe_context *pipe,
1847                                struct pipe_resource *tex,
1848                                unsigned level,
1849                                const struct pipe_box *box,
1850                                const void *data)
1851 {
1852         struct pipe_screen *screen = pipe->screen;
1853         struct r600_texture *rtex = (struct r600_texture*)tex;
1854         struct pipe_surface tmpl = {{0}};
1855         struct pipe_surface *sf;
1856         const struct util_format_description *desc =
1857                 util_format_description(tex->format);
1858
1859         tmpl.format = tex->format;
1860         tmpl.u.tex.first_layer = box->z;
1861         tmpl.u.tex.last_layer = box->z + box->depth - 1;
1862         tmpl.u.tex.level = level;
1863         sf = pipe->create_surface(pipe, tex, &tmpl);
1864         if (!sf)
1865                 return;
1866
1867         if (rtex->is_depth) {
1868                 unsigned clear;
1869                 float depth;
1870                 uint8_t stencil = 0;
1871
1872                 /* Depth is always present. */
1873                 clear = PIPE_CLEAR_DEPTH;
1874                 desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
1875
1876                 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
1877                         clear |= PIPE_CLEAR_STENCIL;
1878                         desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
1879                 }
1880
1881                 pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
1882                                           box->x, box->y,
1883                                           box->width, box->height, false);
1884         } else {
1885                 union pipe_color_union color;
1886
1887                 /* pipe_color_union requires the full vec4 representation. */
1888                 if (util_format_is_pure_uint(tex->format))
1889                         desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
1890                 else if (util_format_is_pure_sint(tex->format))
1891                         desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
1892                 else
1893                         desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
1894
1895                 if (screen->is_format_supported(screen, tex->format,
1896                                                 tex->target, 0,
1897                                                 PIPE_BIND_RENDER_TARGET)) {
1898                         pipe->clear_render_target(pipe, sf, &color,
1899                                                   box->x, box->y,
1900                                                   box->width, box->height, false);
1901                 } else {
1902                         /* Software fallback - just for R9G9B9E5_FLOAT */
1903                         util_clear_render_target(pipe, sf, &color,
1904                                                  box->x, box->y,
1905                                                  box->width, box->height);
1906                 }
1907         }
1908         pipe_surface_reference(&sf, NULL);
1909 }
1910
1911 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
1912 {
1913         const struct util_format_description *desc = util_format_description(format);
1914
1915 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
1916
1917         if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1918                 return V_0280A0_SWAP_STD;
1919
1920         if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1921                 return ~0U;
1922
1923         switch (desc->nr_channels) {
1924         case 1:
1925                 if (HAS_SWIZZLE(0,X))
1926                         return V_0280A0_SWAP_STD; /* X___ */
1927                 else if (HAS_SWIZZLE(3,X))
1928                         return V_0280A0_SWAP_ALT_REV; /* ___X */
1929                 break;
1930         case 2:
1931                 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
1932                     (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
1933                     (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
1934                         return V_0280A0_SWAP_STD; /* XY__ */
1935                 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
1936                          (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
1937                          (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
1938                         /* YX__ */
1939                         return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
1940                 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
1941                         return V_0280A0_SWAP_ALT; /* X__Y */
1942                 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
1943                         return V_0280A0_SWAP_ALT_REV; /* Y__X */
1944                 break;
1945         case 3:
1946                 if (HAS_SWIZZLE(0,X))
1947                         return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
1948                 else if (HAS_SWIZZLE(0,Z))
1949                         return V_0280A0_SWAP_STD_REV; /* ZYX */
1950                 break;
1951         case 4:
1952                 /* check the middle channels, the 1st and 4th channel can be NONE */
1953                 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
1954                         return V_0280A0_SWAP_STD; /* XYZW */
1955                 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
1956                         return V_0280A0_SWAP_STD_REV; /* WZYX */
1957                 } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
1958                         return V_0280A0_SWAP_ALT; /* ZYXW */
1959                 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
1960                         /* YZWX */
1961                         if (desc->is_array)
1962                                 return V_0280A0_SWAP_ALT_REV;
1963                         else
1964                                 return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
1965                 }
1966                 break;
1967         }
1968         return ~0U;
1969 }
1970
1971 /* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
1972
1973 static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx,
1974                                          int slot)
1975 {
1976         int i;
1977
1978         if (rctx->dcc_stats[slot].query_active)
1979                 vi_separate_dcc_stop_query(&rctx->b,
1980                                            rctx->dcc_stats[slot].tex);
1981
1982         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++)
1983                 if (rctx->dcc_stats[slot].ps_stats[i]) {
1984                         rctx->b.destroy_query(&rctx->b,
1985                                               rctx->dcc_stats[slot].ps_stats[i]);
1986                         rctx->dcc_stats[slot].ps_stats[i] = NULL;
1987                 }
1988
1989         r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL);
1990 }
1991
1992 /**
1993  * Return the per-context slot where DCC statistics queries for the texture live.
1994  */
1995 static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx,
1996                                                struct r600_texture *tex)
1997 {
1998         int i, empty_slot = -1;
1999
2000         /* Remove zombie textures (textures kept alive by this array only). */
2001         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++)
2002                 if (rctx->dcc_stats[i].tex &&
2003                     rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1)
2004                         vi_dcc_clean_up_context_slot(rctx, i);
2005
2006         /* Find the texture. */
2007         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
2008                 /* Return if found. */
2009                 if (rctx->dcc_stats[i].tex == tex) {
2010                         rctx->dcc_stats[i].last_use_timestamp = os_time_get();
2011                         return i;
2012                 }
2013
2014                 /* Record the first seen empty slot. */
2015                 if (empty_slot == -1 && !rctx->dcc_stats[i].tex)
2016                         empty_slot = i;
2017         }
2018
2019         /* Not found. Remove the oldest member to make space in the array. */
2020         if (empty_slot == -1) {
2021                 int oldest_slot = 0;
2022
2023                 /* Find the oldest slot. */
2024                 for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++)
2025                         if (rctx->dcc_stats[oldest_slot].last_use_timestamp >
2026                             rctx->dcc_stats[i].last_use_timestamp)
2027                                 oldest_slot = i;
2028
2029                 /* Clean up the oldest slot. */
2030                 vi_dcc_clean_up_context_slot(rctx, oldest_slot);
2031                 empty_slot = oldest_slot;
2032         }
2033
2034         /* Add the texture to the new slot. */
2035         r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex);
2036         rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
2037         return empty_slot;
2038 }
2039
2040 static struct pipe_query *
2041 vi_create_resuming_pipestats_query(struct pipe_context *ctx)
2042 {
2043         struct r600_query_hw *query = (struct r600_query_hw*)
2044                 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
2045
2046         query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES;
2047         return (struct pipe_query*)query;
2048 }
2049
2050 /**
2051  * Called when binding a color buffer.
2052  */
2053 void vi_separate_dcc_start_query(struct pipe_context *ctx,
2054                                  struct r600_texture *tex)
2055 {
2056         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
2057         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
2058
2059         assert(!rctx->dcc_stats[i].query_active);
2060
2061         if (!rctx->dcc_stats[i].ps_stats[0])
2062                 rctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(ctx);
2063
2064         /* begin or resume the query */
2065         ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
2066         rctx->dcc_stats[i].query_active = true;
2067 }
2068
2069 /**
2070  * Called when unbinding a color buffer.
2071  */
2072 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
2073                                 struct r600_texture *tex)
2074 {
2075         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
2076         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
2077
2078         assert(rctx->dcc_stats[i].query_active);
2079         assert(rctx->dcc_stats[i].ps_stats[0]);
2080
2081         /* pause or end the query */
2082         ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
2083         rctx->dcc_stats[i].query_active = false;
2084 }
2085
2086 static bool vi_should_enable_separate_dcc(struct r600_texture *tex)
2087 {
2088         /* The minimum number of fullscreen draws per frame that is required
2089          * to enable DCC. */
2090         return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
2091 }
2092
2093 /* Called by fast clear. */
2094 static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
2095                                        struct r600_texture *tex)
2096 {
2097         /* The intent is to use this with shared displayable back buffers,
2098          * but it's not strictly limited only to them.
2099          */
2100         if (!tex->resource.is_shared ||
2101             !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
2102             tex->resource.b.b.target != PIPE_TEXTURE_2D ||
2103             tex->resource.b.b.last_level > 0 ||
2104             !tex->surface.dcc_size)
2105                 return;
2106
2107         if (tex->dcc_offset)
2108                 return; /* already enabled */
2109
2110         /* Enable the DCC stat gathering. */
2111         if (!tex->dcc_gather_statistics) {
2112                 tex->dcc_gather_statistics = true;
2113                 vi_separate_dcc_start_query(&rctx->b, tex);
2114         }
2115
2116         if (!vi_should_enable_separate_dcc(tex))
2117                 return; /* stats show that DCC decompression is too expensive */
2118
2119         assert(tex->surface.num_dcc_levels);
2120         assert(!tex->dcc_separate_buffer);
2121
2122         r600_texture_discard_cmask(rctx->screen, tex);
2123
2124         /* Get a DCC buffer. */
2125         if (tex->last_dcc_separate_buffer) {
2126                 assert(tex->dcc_gather_statistics);
2127                 assert(!tex->dcc_separate_buffer);
2128                 tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
2129                 tex->last_dcc_separate_buffer = NULL;
2130         } else {
2131                 tex->dcc_separate_buffer = (struct r600_resource*)
2132                         r600_aligned_buffer_create(rctx->b.screen,
2133                                                    R600_RESOURCE_FLAG_UNMAPPABLE,
2134                                                    PIPE_USAGE_DEFAULT,
2135                                                    tex->surface.dcc_size,
2136                                                    tex->surface.dcc_alignment);
2137                 if (!tex->dcc_separate_buffer)
2138                         return;
2139         }
2140
2141         /* dcc_offset is the absolute GPUVM address. */
2142         tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
2143
2144         /* no need to flag anything since this is called by fast clear that
2145          * flags framebuffer state
2146          */
2147 }
2148
2149 /**
2150  * Called by pipe_context::flush_resource, the place where DCC decompression
2151  * takes place.
2152  */
2153 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
2154                                              struct r600_texture *tex)
2155 {
2156         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
2157         struct pipe_query *tmp;
2158         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
2159         bool query_active = rctx->dcc_stats[i].query_active;
2160         bool disable = false;
2161
2162         if (rctx->dcc_stats[i].ps_stats[2]) {
2163                 union pipe_query_result result;
2164
2165                 /* Read the results. */
2166                 ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
2167                                       true, &result);
2168                 r600_query_hw_reset_buffers(rctx,
2169                                             (struct r600_query_hw*)
2170                                             rctx->dcc_stats[i].ps_stats[2]);
2171
2172                 /* Compute the approximate number of fullscreen draws. */
2173                 tex->ps_draw_ratio =
2174                         result.pipeline_statistics.ps_invocations /
2175                         (tex->resource.b.b.width0 * tex->resource.b.b.height0);
2176                 rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
2177
2178                 disable = tex->dcc_separate_buffer &&
2179                           !vi_should_enable_separate_dcc(tex);
2180         }
2181
2182         tex->num_slow_clears = 0;
2183
2184         /* stop the statistics query for ps_stats[0] */
2185         if (query_active)
2186                 vi_separate_dcc_stop_query(ctx, tex);
2187
2188         /* Move the queries in the queue by one. */
2189         tmp = rctx->dcc_stats[i].ps_stats[2];
2190         rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1];
2191         rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0];
2192         rctx->dcc_stats[i].ps_stats[0] = tmp;
2193
2194         /* create and start a new query as ps_stats[0] */
2195         if (query_active)
2196                 vi_separate_dcc_start_query(ctx, tex);
2197
2198         if (disable) {
2199                 assert(!tex->last_dcc_separate_buffer);
2200                 tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
2201                 tex->dcc_separate_buffer = NULL;
2202                 tex->dcc_offset = 0;
2203                 /* no need to flag anything since this is called after
2204                  * decompression that re-sets framebuffer state
2205                  */
2206         }
2207 }
2208
2209 /* FAST COLOR CLEAR */
2210
2211 static void evergreen_set_clear_color(struct r600_texture *rtex,
2212                                       enum pipe_format surface_format,
2213                                       const union pipe_color_union *color)
2214 {
2215         union util_color uc;
2216
2217         memset(&uc, 0, sizeof(uc));
2218
2219         if (rtex->surface.bpe == 16) {
2220                 /* DCC fast clear only:
2221                  *   CLEAR_WORD0 = R = G = B
2222                  *   CLEAR_WORD1 = A
2223                  */
2224                 assert(color->ui[0] == color->ui[1] &&
2225                        color->ui[0] == color->ui[2]);
2226                 uc.ui[0] = color->ui[0];
2227                 uc.ui[1] = color->ui[3];
2228         } else if (util_format_is_pure_uint(surface_format)) {
2229                 util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
2230         } else if (util_format_is_pure_sint(surface_format)) {
2231                 util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
2232         } else {
2233                 util_pack_color(color->f, surface_format, &uc);
2234         }
2235
2236         memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
2237 }
2238
2239 static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
2240                                          const union pipe_color_union *color,
2241                                          uint32_t* reset_value,
2242                                          bool* clear_words_needed)
2243 {
2244         bool values[4] = {};
2245         int i;
2246         bool main_value = false;
2247         bool extra_value = false;
2248         int extra_channel;
2249         const struct util_format_description *desc = util_format_description(surface_format);
2250
2251         if (desc->block.bits == 128 &&
2252             (color->ui[0] != color->ui[1] ||
2253              color->ui[0] != color->ui[2]))
2254                 return false;
2255
2256         *clear_words_needed = true;
2257         *reset_value = 0x20202020U;
2258
2259         /* If we want to clear without needing a fast clear eliminate step, we
2260          * can set each channel to 0 or 1 (or 0/max for integer formats). We
2261          * have two sets of flags, one for the last or first channel(extra) and
2262          * one for the other channels(main).
2263          */
2264
2265         if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
2266             surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
2267             surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
2268                 extra_channel = -1;
2269         } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2270                 if(r600_translate_colorswap(surface_format, false) <= 1)
2271                         extra_channel = desc->nr_channels - 1;
2272                 else
2273                         extra_channel = 0;
2274         } else
2275                 return true;
2276
2277         for (i = 0; i < 4; ++i) {
2278                 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
2279
2280                 if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
2281                     desc->swizzle[i] > PIPE_SWIZZLE_W)
2282                         continue;
2283
2284                 if (desc->channel[i].pure_integer &&
2285                     desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2286                         /* Use the maximum value for clamping the clear color. */
2287                         int max = u_bit_consecutive(0, desc->channel[i].size - 1);
2288
2289                         values[i] = color->i[i] != 0;
2290                         if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
2291                                 return true;
2292                 } else if (desc->channel[i].pure_integer &&
2293                            desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2294                         /* Use the maximum value for clamping the clear color. */
2295                         unsigned max = u_bit_consecutive(0, desc->channel[i].size);
2296
2297                         values[i] = color->ui[i] != 0U;
2298                         if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
2299                                 return true;
2300                 } else {
2301                         values[i] = color->f[i] != 0.0F;
2302                         if (color->f[i] != 0.0F && color->f[i] != 1.0F)
2303                                 return true;
2304                 }
2305
2306                 if (index == extra_channel)
2307                         extra_value = values[i];
2308                 else
2309                         main_value = values[i];
2310         }
2311
2312         for (int i = 0; i < 4; ++i)
2313                 if (values[i] != main_value &&
2314                     desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
2315                     desc->swizzle[i] >= PIPE_SWIZZLE_X &&
2316                     desc->swizzle[i] <= PIPE_SWIZZLE_W)
2317                         return true;
2318
2319         *clear_words_needed = false;
2320         if (main_value)
2321                 *reset_value |= 0x80808080U;
2322
2323         if (extra_value)
2324                 *reset_value |= 0x40404040U;
2325         return true;
2326 }
2327
2328 void vi_dcc_clear_level(struct r600_common_context *rctx,
2329                         struct r600_texture *rtex,
2330                         unsigned level, unsigned clear_value)
2331 {
2332         struct pipe_resource *dcc_buffer;
2333         uint64_t dcc_offset;
2334
2335         assert(rtex->dcc_offset && level < rtex->surface.num_dcc_levels);
2336
2337         if (rtex->dcc_separate_buffer) {
2338                 dcc_buffer = &rtex->dcc_separate_buffer->b.b;
2339                 dcc_offset = 0;
2340         } else {
2341                 dcc_buffer = &rtex->resource.b.b;
2342                 dcc_offset = rtex->dcc_offset;
2343         }
2344
2345         dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
2346
2347         rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset,
2348                            rtex->surface.u.legacy.level[level].dcc_fast_clear_size,
2349                            clear_value, R600_COHERENCY_CB_META);
2350 }
2351
2352 /* Set the same micro tile mode as the destination of the last MSAA resolve.
2353  * This allows hitting the MSAA resolve fast path, which requires that both
2354  * src and dst micro tile modes match.
2355  */
2356 static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
2357                                            struct r600_texture *rtex)
2358 {
2359         if (rtex->resource.is_shared ||
2360             rtex->resource.b.b.nr_samples <= 1 ||
2361             rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
2362                 return;
2363
2364         assert(rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
2365         assert(rtex->resource.b.b.last_level == 0);
2366
2367         /* These magic numbers were copied from addrlib. It doesn't use any
2368          * definitions for them either. They are all 2D_TILED_THIN1 modes with
2369          * different bpp and micro tile mode.
2370          */
2371         if (rscreen->chip_class >= CIK) {
2372                 switch (rtex->last_msaa_resolve_target_micro_mode) {
2373                 case RADEON_MICRO_MODE_DISPLAY:
2374                         rtex->surface.u.legacy.tiling_index[0] = 10;
2375                         break;
2376                 case RADEON_MICRO_MODE_THIN:
2377                         rtex->surface.u.legacy.tiling_index[0] = 14;
2378                         break;
2379                 case RADEON_MICRO_MODE_ROTATED:
2380                         rtex->surface.u.legacy.tiling_index[0] = 28;
2381                         break;
2382                 default: /* depth, thick */
2383                         assert(!"unexpected micro mode");
2384                         return;
2385                 }
2386         } else { /* SI */
2387                 switch (rtex->last_msaa_resolve_target_micro_mode) {
2388                 case RADEON_MICRO_MODE_DISPLAY:
2389                         switch (rtex->surface.bpe) {
2390                         case 1:
2391                             rtex->surface.u.legacy.tiling_index[0] = 10;
2392                             break;
2393                         case 2:
2394                             rtex->surface.u.legacy.tiling_index[0] = 11;
2395                             break;
2396                         default: /* 4, 8 */
2397                             rtex->surface.u.legacy.tiling_index[0] = 12;
2398                             break;
2399                         }
2400                         break;
2401                 case RADEON_MICRO_MODE_THIN:
2402                         switch (rtex->surface.bpe) {
2403                         case 1:
2404                                 rtex->surface.u.legacy.tiling_index[0] = 14;
2405                                 break;
2406                         case 2:
2407                                 rtex->surface.u.legacy.tiling_index[0] = 15;
2408                                 break;
2409                         case 4:
2410                                 rtex->surface.u.legacy.tiling_index[0] = 16;
2411                                 break;
2412                         default: /* 8, 16 */
2413                                 rtex->surface.u.legacy.tiling_index[0] = 17;
2414                                 break;
2415                         }
2416                         break;
2417                 default: /* depth, thick */
2418                         assert(!"unexpected micro mode");
2419                         return;
2420                 }
2421         }
2422
2423         rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
2424
2425         p_atomic_inc(&rscreen->dirty_tex_counter);
2426 }
2427
2428 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
2429                                    struct pipe_framebuffer_state *fb,
2430                                    struct r600_atom *fb_state,
2431                                    unsigned *buffers, unsigned *dirty_cbufs,
2432                                    const union pipe_color_union *color)
2433 {
2434         int i;
2435
2436         /* This function is broken in BE, so just disable this path for now */
2437 #ifdef PIPE_ARCH_BIG_ENDIAN
2438         return;
2439 #endif
2440
2441         if (rctx->render_cond)
2442                 return;
2443
2444         for (i = 0; i < fb->nr_cbufs; i++) {
2445                 struct r600_texture *tex;
2446                 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
2447
2448                 if (!fb->cbufs[i])
2449                         continue;
2450
2451                 /* if this colorbuffer is not being cleared */
2452                 if (!(*buffers & clear_bit))
2453                         continue;
2454
2455                 tex = (struct r600_texture *)fb->cbufs[i]->texture;
2456
2457                 /* the clear is allowed if all layers are bound */
2458                 if (fb->cbufs[i]->u.tex.first_layer != 0 ||
2459                     fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
2460                         continue;
2461                 }
2462
2463                 /* cannot clear mipmapped textures */
2464                 if (fb->cbufs[i]->texture->last_level != 0) {
2465                         continue;
2466                 }
2467
2468                 /* only supported on tiled surfaces */
2469                 if (tex->surface.is_linear) {
2470                         continue;
2471                 }
2472
2473                 /* shared textures can't use fast clear without an explicit flush,
2474                  * because there is no way to communicate the clear color among
2475                  * all clients
2476                  */
2477                 if (tex->resource.is_shared &&
2478                     !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
2479                         continue;
2480
2481                 /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
2482                 if (rctx->chip_class == CIK &&
2483                     tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
2484                     rctx->screen->info.drm_major == 2 &&
2485                     rctx->screen->info.drm_minor < 38) {
2486                         continue;
2487                 }
2488
2489                 /* Fast clear is the most appropriate place to enable DCC for
2490                  * displayable surfaces.
2491                  */
2492                 if (rctx->chip_class >= VI &&
2493                     !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) {
2494                         vi_separate_dcc_try_enable(rctx, tex);
2495
2496                         /* RB+ isn't supported with a CMASK-based clear, so all
2497                          * clears are considered to be hypothetically slow
2498                          * clears, which is weighed when determining whether to
2499                          * enable separate DCC.
2500                          */
2501                         if (tex->dcc_gather_statistics &&
2502                             rctx->screen->rbplus_allowed)
2503                                 tex->num_slow_clears++;
2504                 }
2505
2506                 /* Try to clear DCC first, otherwise try CMASK. */
2507                 if (tex->dcc_offset && tex->surface.num_dcc_levels) {
2508                         uint32_t reset_value;
2509                         bool clear_words_needed;
2510
2511                         if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
2512                                 continue;
2513
2514                         if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format,
2515                                                           color, &reset_value,
2516                                                           &clear_words_needed))
2517                                 continue;
2518
2519                         vi_dcc_clear_level(rctx, tex, 0, reset_value);
2520
2521                         if (clear_words_needed)
2522                                 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
2523                         tex->separate_dcc_dirty = true;
2524                 } else {
2525                         /* 128-bit formats are unusupported */
2526                         if (tex->surface.bpe > 8) {
2527                                 continue;
2528                         }
2529
2530                         /* RB+ doesn't work with CMASK fast clear. */
2531                         if (rctx->screen->rbplus_allowed)
2532                                 continue;
2533
2534                         /* ensure CMASK is enabled */
2535                         r600_texture_alloc_cmask_separate(rctx->screen, tex);
2536                         if (tex->cmask.size == 0) {
2537                                 continue;
2538                         }
2539
2540                         /* Do the fast clear. */
2541                         rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
2542                                            tex->cmask.offset, tex->cmask.size, 0,
2543                                            R600_COHERENCY_CB_META);
2544
2545                         tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
2546                 }
2547
2548                 /* We can change the micro tile mode before a full clear. */
2549                 if (rctx->screen->chip_class >= SI)
2550                         si_set_optimal_micro_tile_mode(rctx->screen, tex);
2551
2552                 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
2553
2554                 if (dirty_cbufs)
2555                         *dirty_cbufs |= 1 << i;
2556                 rctx->set_atom_dirty(rctx, fb_state, true);
2557                 *buffers &= ~clear_bit;
2558         }
2559 }
2560
2561 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
2562 {
2563         rscreen->b.resource_from_handle = r600_texture_from_handle;
2564         rscreen->b.resource_get_handle = r600_texture_get_handle;
2565 }
2566
2567 void r600_init_context_texture_functions(struct r600_common_context *rctx)
2568 {
2569         rctx->b.create_surface = r600_create_surface;
2570         rctx->b.surface_destroy = r600_surface_destroy;
2571         rctx->b.clear_texture = r600_clear_texture;
2572 }