src/gallium/drivers/radeon/r600_texture.c

   1 /*
   2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Jerome Glisse
  25  *      Corbin Simpson
  26  */
  27 #include "r600_pipe_common.h"
  28 #include "r600_cs.h"
  29 #include "r600_query.h"
  30 #include "util/u_format.h"
  31 #include "util/u_memory.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_surface.h"
  34 #include "os/os_time.h"
  35 #include <errno.h>
  36 #include <inttypes.h>
  37
  38 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
  39                                        struct r600_texture *rtex);
  40 static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
  41                                    const struct pipe_resource *templ);
  42
  43
  44 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
  45                                struct r600_texture *rdst,
  46                                unsigned dst_level, unsigned dstx,
  47                                unsigned dsty, unsigned dstz,
  48                                struct r600_texture *rsrc,
  49                                unsigned src_level,
  50                                const struct pipe_box *src_box)
  51 {
  52         if (!rctx->dma.cs)
  53                 return false;
  54
  55         if (util_format_get_blocksizebits(rdst->resource.b.b.format) !=
  56             util_format_get_blocksizebits(rsrc->resource.b.b.format))
  57                 return false;
  58
  59         /* MSAA: Blits don't exist in the real world. */
  60         if (rsrc->resource.b.b.nr_samples > 1 ||
  61             rdst->resource.b.b.nr_samples > 1)
  62                 return false;
  63
  64         /* Depth-stencil surfaces:
  65          *   When dst is linear, the DB->CB copy preserves HTILE.
  66          *   When dst is tiled, the 3D path must be used to update HTILE.
  67          */
  68         if (rsrc->is_depth || rdst->is_depth)
  69                 return false;
  70
  71         /* DCC as:
  72          *   src: Use the 3D path. DCC decompression is expensive.
  73          *   dst: Use the 3D path to compress the pixels with DCC.
  74          */
  75         if ((rsrc->dcc_offset && rsrc->surface.level[src_level].dcc_enabled) ||
  76             (rdst->dcc_offset && rdst->surface.level[dst_level].dcc_enabled))
  77                 return false;
  78
  79         /* CMASK as:
  80          *   src: Both texture and SDMA paths need decompression. Use SDMA.
  81          *   dst: If overwriting the whole texture, discard CMASK and use
  82          *        SDMA. Otherwise, use the 3D path.
  83          */
  84         if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
  85                 /* The CMASK clear is only enabled for the first level. */
  86                 assert(dst_level == 0);
  87                 if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
  88                                                       dstx, dsty, dstz, src_box->width,
  89                                                       src_box->height, src_box->depth))
  90                         return false;
  91
  92                 r600_texture_discard_cmask(rctx->screen, rdst);
  93         }
  94
  95         /* All requirements are met. Prepare textures for SDMA. */
  96         if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
  97                 rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
  98
  99         assert(!(rsrc->dirty_level_mask & (1 << src_level)));
 100         assert(!(rdst->dirty_level_mask & (1 << dst_level)));
 101
 102         return true;
 103 }
 104
 105 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
 106 static void r600_copy_region_with_blit(struct pipe_context *pipe,
 107                                        struct pipe_resource *dst,
 108                                        unsigned dst_level,
 109                                        unsigned dstx, unsigned dsty, unsigned dstz,
 110                                        struct pipe_resource *src,
 111                                        unsigned src_level,
 112                                        const struct pipe_box *src_box)
 113 {
 114         struct pipe_blit_info blit;
 115
 116         memset(&blit, 0, sizeof(blit));
 117         blit.src.resource = src;
 118         blit.src.format = src->format;
 119         blit.src.level = src_level;
 120         blit.src.box = *src_box;
 121         blit.dst.resource = dst;
 122         blit.dst.format = dst->format;
 123         blit.dst.level = dst_level;
 124         blit.dst.box.x = dstx;
 125         blit.dst.box.y = dsty;
 126         blit.dst.box.z = dstz;
 127         blit.dst.box.width = src_box->width;
 128         blit.dst.box.height = src_box->height;
 129         blit.dst.box.depth = src_box->depth;
 130         blit.mask = util_format_get_mask(src->format) &
 131                     util_format_get_mask(dst->format);
 132         blit.filter = PIPE_TEX_FILTER_NEAREST;
 133
 134         if (blit.mask) {
 135                 pipe->blit(pipe, &blit);
 136         }
 137 }
 138
 139 /* Copy from a full GPU texture to a transfer's staging one. */
 140 static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
 141 {
 142         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 143         struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
 144         struct pipe_resource *dst = &rtransfer->staging->b.b;
 145         struct pipe_resource *src = transfer->resource;
 146
 147         if (src->nr_samples > 1) {
 148                 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
 149                                            src, transfer->level, &transfer->box);
 150                 return;
 151         }
 152
 153         rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
 154                        &transfer->box);
 155 }
 156
 157 /* Copy from a transfer's staging texture to a full GPU one. */
 158 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
 159 {
 160         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 161         struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
 162         struct pipe_resource *dst = transfer->resource;
 163         struct pipe_resource *src = &rtransfer->staging->b.b;
 164         struct pipe_box sbox;
 165
 166         u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
 167
 168         if (dst->nr_samples > 1) {
 169                 r600_copy_region_with_blit(ctx, dst, transfer->level,
 170                                            transfer->box.x, transfer->box.y, transfer->box.z,
 171                                            src, 0, &sbox);
 172                 return;
 173         }
 174
 175         rctx->dma_copy(ctx, dst, transfer->level,
 176                        transfer->box.x, transfer->box.y, transfer->box.z,
 177                        src, 0, &sbox);
 178 }
 179
 180 static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
 181                                         const struct pipe_box *box)
 182 {
 183         enum pipe_format format = rtex->resource.b.b.format;
 184
 185         return rtex->surface.level[level].offset +
 186                box->z * rtex->surface.level[level].slice_size +
 187                box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
 188                box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 189 }
 190
 191 static int r600_init_surface(struct r600_common_screen *rscreen,
 192                              struct radeon_surf *surface,
 193                              const struct pipe_resource *ptex,
 194                              unsigned array_mode,
 195                              bool is_flushed_depth)
 196 {
 197         const struct util_format_description *desc =
 198                 util_format_description(ptex->format);
 199         bool is_depth, is_stencil;
 200
 201         is_depth = util_format_has_depth(desc);
 202         is_stencil = util_format_has_stencil(desc);
 203
 204         surface->npix_x = ptex->width0;
 205         surface->npix_y = ptex->height0;
 206         surface->npix_z = ptex->depth0;
 207         surface->blk_w = util_format_get_blockwidth(ptex->format);
 208         surface->blk_h = util_format_get_blockheight(ptex->format);
 209         surface->blk_d = 1;
 210         surface->array_size = 1;
 211         surface->last_level = ptex->last_level;
 212
 213         if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
 214             ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
 215                 surface->bpe = 4; /* stencil is allocated separately on evergreen */
 216         } else {
 217                 surface->bpe = util_format_get_blocksize(ptex->format);
 218                 /* align byte per element on dword */
 219                 if (surface->bpe == 3) {
 220                         surface->bpe = 4;
 221                 }
 222         }
 223
 224         surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
 225         surface->flags = RADEON_SURF_SET(array_mode, MODE);
 226
 227         switch (ptex->target) {
 228         case PIPE_TEXTURE_1D:
 229                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
 230                 break;
 231         case PIPE_TEXTURE_RECT:
 232         case PIPE_TEXTURE_2D:
 233                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
 234                 break;
 235         case PIPE_TEXTURE_3D:
 236                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
 237                 break;
 238         case PIPE_TEXTURE_1D_ARRAY:
 239                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
 240                 surface->array_size = ptex->array_size;
 241                 break;
 242         case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
 243                 assert(ptex->array_size % 6 == 0);
 244         case PIPE_TEXTURE_2D_ARRAY:
 245                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
 246                 surface->array_size = ptex->array_size;
 247                 break;
 248         case PIPE_TEXTURE_CUBE:
 249                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
 250                 break;
 251         case PIPE_BUFFER:
 252         default:
 253                 return -EINVAL;
 254         }
 255
 256         if (!is_flushed_depth && is_depth) {
 257                 surface->flags |= RADEON_SURF_ZBUFFER;
 258
 259                 if (is_stencil) {
 260                         surface->flags |= RADEON_SURF_SBUFFER |
 261                                           RADEON_SURF_HAS_SBUFFER_MIPTREE;
 262                 }
 263         }
 264         if (rscreen->chip_class >= SI) {
 265                 surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
 266         }
 267
 268         if (rscreen->chip_class >= VI &&
 269             (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
 270              ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
 271                 surface->flags |= RADEON_SURF_DISABLE_DCC;
 272
 273         if (ptex->bind & PIPE_BIND_SCANOUT) {
 274                 /* This should catch bugs in gallium users setting incorrect flags. */
 275                 assert(surface->nsamples == 1 &&
 276                        surface->array_size == 1 &&
 277                        surface->npix_z == 1 &&
 278                        surface->last_level == 0 &&
 279                        !(surface->flags & RADEON_SURF_Z_OR_SBUFFER));
 280
 281                 surface->flags |= RADEON_SURF_SCANOUT;
 282         }
 283         return 0;
 284 }
 285
 286 static int r600_setup_surface(struct pipe_screen *screen,
 287                               struct r600_texture *rtex,
 288                               unsigned pitch_in_bytes_override,
 289                               unsigned offset)
 290 {
 291         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 292         unsigned i;
 293         int r;
 294
 295         r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
 296         if (r) {
 297                 return r;
 298         }
 299
 300         rtex->size = rtex->surface.bo_size;
 301
 302         if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
 303                 /* old ddx on evergreen over estimate alignment for 1d, only 1 level
 304                  * for those
 305                  */
 306                 rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
 307                 rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
 308                 rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
 309         }
 310
 311         if (offset) {
 312                 for (i = 0; i < ARRAY_SIZE(rtex->surface.level); ++i)
 313                         rtex->surface.level[i].offset += offset;
 314         }
 315         return 0;
 316 }
 317
 318 static void r600_texture_init_metadata(struct r600_texture *rtex,
 319                                        struct radeon_bo_metadata *metadata)
 320 {
 321         struct radeon_surf *surface = &rtex->surface;
 322
 323         memset(metadata, 0, sizeof(*metadata));
 324         metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ?
 325                                    RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
 326         metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ?
 327                                    RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
 328         metadata->pipe_config = surface->pipe_config;
 329         metadata->bankw = surface->bankw;
 330         metadata->bankh = surface->bankh;
 331         metadata->tile_split = surface->tile_split;
 332         metadata->mtilea = surface->mtilea;
 333         metadata->num_banks = surface->num_banks;
 334         metadata->stride = surface->level[0].pitch_bytes;
 335         metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
 336 }
 337
 338 static void r600_dirty_all_framebuffer_states(struct r600_common_screen *rscreen)
 339 {
 340         p_atomic_inc(&rscreen->dirty_fb_counter);
 341 }
 342
 343 static void r600_eliminate_fast_color_clear(struct r600_common_screen *rscreen,
 344                                       struct r600_texture *rtex)
 345 {
 346         struct pipe_context *ctx = rscreen->aux_context;
 347
 348         pipe_mutex_lock(rscreen->aux_context_lock);
 349         ctx->flush_resource(ctx, &rtex->resource.b.b);
 350         ctx->flush(ctx, NULL, 0);
 351         pipe_mutex_unlock(rscreen->aux_context_lock);
 352 }
 353
 354 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
 355                                        struct r600_texture *rtex)
 356 {
 357         if (!rtex->cmask.size)
 358                 return;
 359
 360         assert(rtex->resource.b.b.nr_samples <= 1);
 361
 362         /* Disable CMASK. */
 363         memset(&rtex->cmask, 0, sizeof(rtex->cmask));
 364         rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
 365         rtex->dirty_level_mask = 0;
 366
 367         if (rscreen->chip_class >= SI)
 368                 rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
 369         else
 370                 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
 371
 372         if (rtex->cmask_buffer != &rtex->resource)
 373             r600_resource_reference(&rtex->cmask_buffer, NULL);
 374
 375         /* Notify all contexts about the change. */
 376         r600_dirty_all_framebuffer_states(rscreen);
 377         p_atomic_inc(&rscreen->compressed_colortex_counter);
 378 }
 379
 380 static bool r600_can_disable_dcc(struct r600_texture *rtex)
 381 {
 382         /* We can't disable DCC if it can be written by another process. */
 383         return rtex->dcc_offset &&
 384                (!rtex->resource.is_shared ||
 385                 !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
 386 }
 387
 388 static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
 389                                      struct r600_texture *rtex)
 390 {
 391         if (!r600_can_disable_dcc(rtex))
 392                 return false;
 393
 394         assert(rtex->dcc_separate_buffer == NULL);
 395
 396         /* Disable DCC. */
 397         rtex->dcc_offset = 0;
 398
 399         /* Notify all contexts about the change. */
 400         r600_dirty_all_framebuffer_states(rscreen);
 401         return true;
 402 }
 403
 404 /**
 405  * Disable DCC for the texture. (first decompress, then discard metadata).
 406  *
 407  * There is unresolved multi-context synchronization issue between
 408  * screen::aux_context and the current context. If applications do this with
 409  * multiple contexts, it's already undefined behavior for them and we don't
 410  * have to worry about that. The scenario is:
 411  *
 412  * If context 1 disables DCC and context 2 has queued commands that write
 413  * to the texture via CB with DCC enabled, and the order of operations is
 414  * as follows:
 415  *   context 2 queues draw calls rendering to the texture, but doesn't flush
 416  *   context 1 disables DCC and flushes
 417  *   context 1 & 2 reset descriptors and FB state
 418  *   context 2 flushes (new compressed tiles written by the draw calls)
 419  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
 420  *   compressed tiled
 421  *
 422  * \param rctx  the current context if you have one, or rscreen->aux_context
 423  *              if you don't.
 424  */
 425 bool r600_texture_disable_dcc(struct r600_common_context *rctx,
 426                               struct r600_texture *rtex)
 427 {
 428         struct r600_common_screen *rscreen = rctx->screen;
 429
 430         if (!r600_can_disable_dcc(rtex))
 431                 return false;
 432
 433         if (&rctx->b == rscreen->aux_context)
 434                 pipe_mutex_lock(rscreen->aux_context_lock);
 435
 436         /* Decompress DCC. */
 437         rctx->decompress_dcc(&rctx->b, rtex);
 438         rctx->b.flush(&rctx->b, NULL, 0);
 439
 440         if (&rctx->b == rscreen->aux_context)
 441                 pipe_mutex_unlock(rscreen->aux_context_lock);
 442
 443         return r600_texture_discard_dcc(rscreen, rtex);
 444 }
 445
 446 static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
 447                                              struct r600_texture *rtex,
 448                                              bool invalidate_storage)
 449 {
 450         struct pipe_screen *screen = rctx->b.screen;
 451         struct r600_texture *new_tex;
 452         struct pipe_resource templ = rtex->resource.b.b;
 453         unsigned i;
 454
 455         templ.bind |= PIPE_BIND_LINEAR;
 456
 457         /* r600g doesn't react to dirty_tex_descriptor_counter */
 458         if (rctx->chip_class < SI)
 459                 return;
 460
 461         if (rtex->resource.is_shared ||
 462             rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
 463                 return;
 464
 465         /* This fails with MSAA, depth, and compressed textures. */
 466         if (r600_choose_tiling(rctx->screen, &templ) !=
 467             RADEON_SURF_MODE_LINEAR_ALIGNED)
 468                 return;
 469
 470         new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
 471         if (!new_tex)
 472                 return;
 473
 474         /* Copy the pixels to the new texture. */
 475         if (!invalidate_storage) {
 476                 for (i = 0; i <= templ.last_level; i++) {
 477                         struct pipe_box box;
 478
 479                         u_box_3d(0, 0, 0,
 480                                  u_minify(templ.width0, i), u_minify(templ.height0, i),
 481                                  util_max_layer(&templ, i) + 1, &box);
 482
 483                         rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
 484                                        &rtex->resource.b.b, i, &box);
 485                 }
 486         }
 487
 488         r600_texture_discard_cmask(rctx->screen, rtex);
 489         r600_texture_discard_dcc(rctx->screen, rtex);
 490
 491         /* Replace the structure fields of rtex. */
 492         rtex->resource.b.b.bind = templ.bind;
 493         pb_reference(&rtex->resource.buf, new_tex->resource.buf);
 494         rtex->resource.gpu_address = new_tex->resource.gpu_address;
 495         rtex->resource.domains = new_tex->resource.domains;
 496         rtex->size = new_tex->size;
 497         rtex->surface = new_tex->surface;
 498         rtex->non_disp_tiling = new_tex->non_disp_tiling;
 499         rtex->cb_color_info = new_tex->cb_color_info;
 500         rtex->cmask = new_tex->cmask; /* needed even without CMASK */
 501
 502         assert(!rtex->htile_buffer);
 503         assert(!rtex->cmask.size);
 504         assert(!rtex->fmask.size);
 505         assert(!rtex->dcc_offset);
 506         assert(!rtex->is_depth);
 507
 508         r600_texture_reference(&new_tex, NULL);
 509
 510         r600_dirty_all_framebuffer_states(rctx->screen);
 511         p_atomic_inc(&rctx->screen->dirty_tex_descriptor_counter);
 512 }
 513
 514 static boolean r600_texture_get_handle(struct pipe_screen* screen,
 515                                        struct pipe_context *ctx,
 516                                        struct pipe_resource *resource,
 517                                        struct winsys_handle *whandle,
 518                                        unsigned usage)
 519 {
 520         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 521         struct r600_common_context *aux_context =
 522                 (struct r600_common_context*)rscreen->aux_context;
 523         struct r600_resource *res = (struct r600_resource*)resource;
 524         struct r600_texture *rtex = (struct r600_texture*)resource;
 525         struct radeon_bo_metadata metadata;
 526         bool update_metadata = false;
 527
 528         /* This is not supported now, but it might be required for OpenCL
 529          * interop in the future.
 530          */
 531         if (resource->target != PIPE_BUFFER &&
 532             (resource->nr_samples > 1 || rtex->is_depth))
 533                 return false;
 534
 535         if (resource->target != PIPE_BUFFER) {
 536                 /* Since shader image stores don't support DCC on VI,
 537                  * disable it for external clients that want write
 538                  * access.
 539                  */
 540                 if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
 541                         if (r600_texture_disable_dcc(aux_context, rtex))
 542                                 update_metadata = true;
 543                 }
 544
 545                 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
 546                     rtex->cmask.size) {
 547                         /* Eliminate fast clear (both CMASK and DCC) */
 548                         r600_eliminate_fast_color_clear(rscreen, rtex);
 549
 550                         /* Disable CMASK if flush_resource isn't going
 551                          * to be called.
 552                          */
 553                         r600_texture_discard_cmask(rscreen, rtex);
 554                 }
 555
 556                 /* Set metadata. */
 557                 if (!res->is_shared || update_metadata) {
 558                         r600_texture_init_metadata(rtex, &metadata);
 559                         if (rscreen->query_opaque_metadata)
 560                                 rscreen->query_opaque_metadata(rscreen, rtex,
 561                                                                &metadata);
 562
 563                         rscreen->ws->buffer_set_metadata(res->buf, &metadata);
 564                 }
 565         }
 566
 567         if (res->is_shared) {
 568                 /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
 569                  * doesn't set it.
 570                  */
 571                 res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
 572                 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
 573                         res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
 574         } else {
 575                 res->is_shared = true;
 576                 res->external_usage = usage;
 577         }
 578
 579         return rscreen->ws->buffer_get_handle(res->buf,
 580                                               rtex->surface.level[0].pitch_bytes,
 581                                               rtex->surface.level[0].offset,
 582                                               rtex->surface.level[0].slice_size,
 583                                               whandle);
 584 }
 585
 586 static void r600_texture_destroy(struct pipe_screen *screen,
 587                                  struct pipe_resource *ptex)
 588 {
 589         struct r600_texture *rtex = (struct r600_texture*)ptex;
 590         struct r600_resource *resource = &rtex->resource;
 591
 592         r600_texture_reference(&rtex->flushed_depth_texture, NULL);
 593
 594         r600_resource_reference(&rtex->htile_buffer, NULL);
 595         if (rtex->cmask_buffer != &rtex->resource) {
 596             r600_resource_reference(&rtex->cmask_buffer, NULL);
 597         }
 598         pb_reference(&resource->buf, NULL);
 599         r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
 600         r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
 601         FREE(rtex);
 602 }
 603
 604 static const struct u_resource_vtbl r600_texture_vtbl;
 605
 606 /* The number of samples can be specified independently of the texture. */
 607 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
 608                                  struct r600_texture *rtex,
 609                                  unsigned nr_samples,
 610                                  struct r600_fmask_info *out)
 611 {
 612         /* FMASK is allocated like an ordinary texture. */
 613         struct radeon_surf fmask = rtex->surface;
 614
 615         memset(out, 0, sizeof(*out));
 616
 617         fmask.bo_alignment = 0;
 618         fmask.bo_size = 0;
 619         fmask.nsamples = 1;
 620         fmask.flags |= RADEON_SURF_FMASK;
 621
 622         /* Force 2D tiling if it wasn't set. This may occur when creating
 623          * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
 624          * destination buffer must have an FMASK too. */
 625         fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
 626         fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
 627
 628         if (rscreen->chip_class >= SI) {
 629                 fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
 630         }
 631
 632         switch (nr_samples) {
 633         case 2:
 634         case 4:
 635                 fmask.bpe = 1;
 636                 if (rscreen->chip_class <= CAYMAN) {
 637                         fmask.bankh = 4;
 638                 }
 639                 break;
 640         case 8:
 641                 fmask.bpe = 4;
 642                 break;
 643         default:
 644                 R600_ERR("Invalid sample count for FMASK allocation.\n");
 645                 return;
 646         }
 647
 648         /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
 649          * This can be fixed by writing a separate FMASK allocator specifically
 650          * for R600-R700 asics. */
 651         if (rscreen->chip_class <= R700) {
 652                 fmask.bpe *= 2;
 653         }
 654
 655         if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
 656                 R600_ERR("Got error in surface_init while allocating FMASK.\n");
 657                 return;
 658         }
 659
 660         assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
 661
 662         out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
 663         if (out->slice_tile_max)
 664                 out->slice_tile_max -= 1;
 665
 666         out->tile_mode_index = fmask.tiling_index[0];
 667         out->pitch_in_pixels = fmask.level[0].nblk_x;
 668         out->bank_height = fmask.bankh;
 669         out->alignment = MAX2(256, fmask.bo_alignment);
 670         out->size = fmask.bo_size;
 671 }
 672
 673 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
 674                                         struct r600_texture *rtex)
 675 {
 676         r600_texture_get_fmask_info(rscreen, rtex,
 677                                     rtex->resource.b.b.nr_samples, &rtex->fmask);
 678
 679         rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
 680         rtex->size = rtex->fmask.offset + rtex->fmask.size;
 681 }
 682
 683 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
 684                                  struct r600_texture *rtex,
 685                                  struct r600_cmask_info *out)
 686 {
 687         unsigned cmask_tile_width = 8;
 688         unsigned cmask_tile_height = 8;
 689         unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
 690         unsigned element_bits = 4;
 691         unsigned cmask_cache_bits = 1024;
 692         unsigned num_pipes = rscreen->info.num_tile_pipes;
 693         unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 694
 695         unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
 696         unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
 697         unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
 698         unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
 699         unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
 700
 701         unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
 702         unsigned height = align(rtex->surface.npix_y, macro_tile_height);
 703
 704         unsigned base_align = num_pipes * pipe_interleave_bytes;
 705         unsigned slice_bytes =
 706                 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
 707
 708         assert(macro_tile_width % 128 == 0);
 709         assert(macro_tile_height % 128 == 0);
 710
 711         out->pitch = pitch_elements;
 712         out->height = height;
 713         out->xalign = macro_tile_width;
 714         out->yalign = macro_tile_height;
 715         out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
 716         out->alignment = MAX2(256, base_align);
 717         out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 718                     align(slice_bytes, base_align);
 719 }
 720
 721 static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
 722                                       struct r600_texture *rtex,
 723                                       struct r600_cmask_info *out)
 724 {
 725         unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 726         unsigned num_pipes = rscreen->info.num_tile_pipes;
 727         unsigned cl_width, cl_height;
 728
 729         switch (num_pipes) {
 730         case 2:
 731                 cl_width = 32;
 732                 cl_height = 16;
 733                 break;
 734         case 4:
 735                 cl_width = 32;
 736                 cl_height = 32;
 737                 break;
 738         case 8:
 739                 cl_width = 64;
 740                 cl_height = 32;
 741                 break;
 742         case 16: /* Hawaii */
 743                 cl_width = 64;
 744                 cl_height = 64;
 745                 break;
 746         default:
 747                 assert(0);
 748                 return;
 749         }
 750
 751         unsigned base_align = num_pipes * pipe_interleave_bytes;
 752
 753         unsigned width = align(rtex->surface.npix_x, cl_width*8);
 754         unsigned height = align(rtex->surface.npix_y, cl_height*8);
 755         unsigned slice_elements = (width * height) / (8*8);
 756
 757         /* Each element of CMASK is a nibble. */
 758         unsigned slice_bytes = slice_elements / 2;
 759
 760         out->pitch = width;
 761         out->height = height;
 762         out->xalign = cl_width * 8;
 763         out->yalign = cl_height * 8;
 764         out->slice_tile_max = (width * height) / (128*128);
 765         if (out->slice_tile_max)
 766                 out->slice_tile_max -= 1;
 767
 768         out->alignment = MAX2(256, base_align);
 769         out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 770                     align(slice_bytes, base_align);
 771 }
 772
 773 static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
 774                                         struct r600_texture *rtex)
 775 {
 776         if (rscreen->chip_class >= SI) {
 777                 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 778         } else {
 779                 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 780         }
 781
 782         rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
 783         rtex->size = rtex->cmask.offset + rtex->cmask.size;
 784
 785         if (rscreen->chip_class >= SI)
 786                 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
 787         else
 788                 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
 789 }
 790
 791 static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
 792                                               struct r600_texture *rtex)
 793 {
 794         if (rtex->cmask_buffer)
 795                 return;
 796
 797         assert(rtex->cmask.size == 0);
 798
 799         if (rscreen->chip_class >= SI) {
 800                 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 801         } else {
 802                 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
 803         }
 804
 805         rtex->cmask_buffer = (struct r600_resource *)
 806                 pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
 807                                    PIPE_USAGE_DEFAULT, rtex->cmask.size);
 808         if (rtex->cmask_buffer == NULL) {
 809                 rtex->cmask.size = 0;
 810                 return;
 811         }
 812
 813         /* update colorbuffer state bits */
 814         rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
 815
 816         if (rscreen->chip_class >= SI)
 817                 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
 818         else
 819                 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
 820
 821         p_atomic_inc(&rscreen->compressed_colortex_counter);
 822 }
 823
 824 static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 825                                             struct r600_texture *rtex)
 826 {
 827         unsigned cl_width, cl_height, width, height;
 828         unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
 829         unsigned num_pipes = rscreen->info.num_tile_pipes;
 830
 831         if (rscreen->chip_class <= EVERGREEN &&
 832             rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
 833                 return 0;
 834
 835         /* HW bug on R6xx. */
 836         if (rscreen->chip_class == R600 &&
 837             (rtex->surface.level[0].npix_x > 7680 ||
 838              rtex->surface.level[0].npix_y > 7680))
 839                 return 0;
 840
 841         /* HTILE is broken with 1D tiling on old kernels and CIK. */
 842         if (rscreen->chip_class >= CIK &&
 843             rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
 844             rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
 845                 return 0;
 846
 847         /* Overalign HTILE on P2 configs to work around GPU hangs in
 848          * piglit/depthstencil-render-miplevels 585.
 849          *
 850          * This has been confirmed to help Kabini & Stoney, where the hangs
 851          * are always reproducible. I think I have seen the test hang
 852          * on Carrizo too, though it was very rare there.
 853          */
 854         if (rscreen->chip_class >= CIK && num_pipes < 4)
 855                 num_pipes = 4;
 856
 857         switch (num_pipes) {
 858         case 1:
 859                 cl_width = 32;
 860                 cl_height = 16;
 861                 break;
 862         case 2:
 863                 cl_width = 32;
 864                 cl_height = 32;
 865                 break;
 866         case 4:
 867                 cl_width = 64;
 868                 cl_height = 32;
 869                 break;
 870         case 8:
 871                 cl_width = 64;
 872                 cl_height = 64;
 873                 break;
 874         case 16:
 875                 cl_width = 128;
 876                 cl_height = 64;
 877                 break;
 878         default:
 879                 assert(0);
 880                 return 0;
 881         }
 882
 883         width = align(rtex->surface.npix_x, cl_width * 8);
 884         height = align(rtex->surface.npix_y, cl_height * 8);
 885
 886         slice_elements = (width * height) / (8 * 8);
 887         slice_bytes = slice_elements * 4;
 888
 889         pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
 890         base_align = num_pipes * pipe_interleave_bytes;
 891
 892         rtex->htile.pitch = width;
 893         rtex->htile.height = height;
 894         rtex->htile.xalign = cl_width * 8;
 895         rtex->htile.yalign = cl_height * 8;
 896
 897         return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 898                 align(slice_bytes, base_align);
 899 }
 900
 901 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
 902                                         struct r600_texture *rtex)
 903 {
 904         unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
 905
 906         if (!htile_size)
 907                 return;
 908
 909         rtex->htile_buffer = (struct r600_resource*)
 910                              pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
 911                                                 PIPE_USAGE_DEFAULT, htile_size);
 912         if (rtex->htile_buffer == NULL) {
 913                 /* this is not a fatal error as we can still keep rendering
 914                  * without htile buffer */
 915                 R600_ERR("Failed to create buffer object for htile buffer.\n");
 916         } else {
 917                 r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
 918                                          htile_size, 0, R600_COHERENCY_NONE);
 919         }
 920 }
 921
 922 void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
 923 {
 924         int i;
 925
 926         fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
 927                 "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
 928                 "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
 929                 rtex->surface.npix_x, rtex->surface.npix_y,
 930                 rtex->surface.npix_z, rtex->surface.blk_w,
 931                 rtex->surface.blk_h, rtex->surface.blk_d,
 932                 rtex->surface.array_size, rtex->surface.last_level,
 933                 rtex->surface.bpe, rtex->surface.nsamples,
 934                 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
 935
 936         fprintf(f, "  Layout: size=%"PRIu64", alignment=%"PRIu64", bankw=%u, "
 937                 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
 938                 rtex->surface.bo_size, rtex->surface.bo_alignment, rtex->surface.bankw,
 939                 rtex->surface.bankh, rtex->surface.num_banks, rtex->surface.mtilea,
 940                 rtex->surface.tile_split, rtex->surface.pipe_config,
 941                 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
 942
 943         if (rtex->fmask.size)
 944                 fprintf(f, "  FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
 945                         "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
 946                         rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
 947                         rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
 948                         rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
 949
 950         if (rtex->cmask.size)
 951                 fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, "
 952                         "height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
 953                         rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
 954                         rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
 955                         rtex->cmask.yalign, rtex->cmask.slice_tile_max);
 956
 957         if (rtex->htile_buffer)
 958                 fprintf(f, "  HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
 959                         "xalign=%u, yalign=%u\n",
 960                         rtex->htile_buffer->b.b.width0,
 961                         rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
 962                         rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
 963
 964         if (rtex->dcc_offset) {
 965                 fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
 966                         rtex->dcc_offset, rtex->surface.dcc_size,
 967                         rtex->surface.dcc_alignment);
 968                 for (i = 0; i <= rtex->surface.last_level; i++)
 969                         fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
 970                                 "fast_clear_size=%"PRIu64"\n",
 971                                 i, rtex->surface.level[i].dcc_enabled,
 972                                 rtex->surface.level[i].dcc_offset,
 973                                 rtex->surface.level[i].dcc_fast_clear_size);
 974         }
 975
 976         for (i = 0; i <= rtex->surface.last_level; i++)
 977                 fprintf(f, "  Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
 978                         "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
 979                         "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
 980                         i, rtex->surface.level[i].offset,
 981                         rtex->surface.level[i].slice_size,
 982                         u_minify(rtex->resource.b.b.width0, i),
 983                         u_minify(rtex->resource.b.b.height0, i),
 984                         u_minify(rtex->resource.b.b.depth0, i),
 985                         rtex->surface.level[i].nblk_x,
 986                         rtex->surface.level[i].nblk_y,
 987                         rtex->surface.level[i].nblk_z,
 988                         rtex->surface.level[i].pitch_bytes,
 989                         rtex->surface.level[i].mode);
 990
 991         if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
 992                 fprintf(f, "  StencilLayout: tilesplit=%u\n",
 993                         rtex->surface.stencil_tile_split);
 994                 for (i = 0; i <= rtex->surface.last_level; i++) {
 995                         fprintf(f, "  StencilLevel[%i]: offset=%"PRIu64", "
 996                                 "slice_size=%"PRIu64", npix_x=%u, "
 997                                 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
 998                                 "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
 999                                 i, rtex->surface.stencil_level[i].offset,
1000                                 rtex->surface.stencil_level[i].slice_size,
1001                                 u_minify(rtex->resource.b.b.width0, i),
1002                                 u_minify(rtex->resource.b.b.height0, i),
1003                                 u_minify(rtex->resource.b.b.depth0, i),
1004                                 rtex->surface.stencil_level[i].nblk_x,
1005                                 rtex->surface.stencil_level[i].nblk_y,
1006                                 rtex->surface.stencil_level[i].nblk_z,
1007                                 rtex->surface.stencil_level[i].pitch_bytes,
1008                                 rtex->surface.stencil_level[i].mode);
1009                 }
1010         }
1011 }
1012
1013 /* Common processing for r600_texture_create and r600_texture_from_handle */
1014 static struct r600_texture *
1015 r600_texture_create_object(struct pipe_screen *screen,
1016                            const struct pipe_resource *base,
1017                            unsigned pitch_in_bytes_override,
1018                            unsigned offset,
1019                            struct pb_buffer *buf,
1020                            struct radeon_surf *surface)
1021 {
1022         struct r600_texture *rtex;
1023         struct r600_resource *resource;
1024         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1025
1026         rtex = CALLOC_STRUCT(r600_texture);
1027         if (!rtex)
1028                 return NULL;
1029
1030         resource = &rtex->resource;
1031         resource->b.b = *base;
1032         resource->b.vtbl = &r600_texture_vtbl;
1033         pipe_reference_init(&resource->b.b.reference, 1);
1034         resource->b.b.screen = screen;
1035
1036         /* don't include stencil-only formats which we don't support for rendering */
1037         rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
1038
1039         rtex->surface = *surface;
1040         if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) {
1041                 FREE(rtex);
1042                 return NULL;
1043         }
1044
1045         /* Tiled depth textures utilize the non-displayable tile order.
1046          * This must be done after r600_setup_surface.
1047          * Applies to R600-Cayman. */
1048         rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
1049         /* Applies to GCN. */
1050         rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
1051
1052         /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
1053          * between frames, so the only thing that can enable separate DCC
1054          * with DRI2 is multiple slow clears within a frame.
1055          */
1056         rtex->ps_draw_ratio = 0;
1057
1058         if (rtex->is_depth) {
1059                 if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1060                                    R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
1061                     rscreen->chip_class >= EVERGREEN) {
1062                         rtex->can_sample_z = !rtex->surface.depth_adjusted;
1063                         rtex->can_sample_s = !rtex->surface.stencil_adjusted;
1064                 } else {
1065                         if (rtex->resource.b.b.nr_samples <= 1 &&
1066                             (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
1067                              rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
1068                                 rtex->can_sample_z = true;
1069                 }
1070
1071                 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1072                                      R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
1073                         rtex->db_compatible = true;
1074
1075                         if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
1076                                 r600_texture_allocate_htile(rscreen, rtex);
1077                 }
1078         } else {
1079                 if (base->nr_samples > 1) {
1080                         if (!buf) {
1081                                 r600_texture_allocate_fmask(rscreen, rtex);
1082                                 r600_texture_allocate_cmask(rscreen, rtex);
1083                                 rtex->cmask_buffer = &rtex->resource;
1084                         }
1085                         if (!rtex->fmask.size || !rtex->cmask.size) {
1086                                 FREE(rtex);
1087                                 return NULL;
1088                         }
1089                 }
1090
1091                 /* Shared textures must always set up DCC here.
1092                  * If it's not present, it will be disabled by
1093                  * apply_opaque_metadata later.
1094                  */
1095                 if (rtex->surface.dcc_size &&
1096                     (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
1097                     !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
1098                         /* Reserve space for the DCC buffer. */
1099                         rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
1100                         rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
1101                 }
1102         }
1103
1104         /* Now create the backing buffer. */
1105         if (!buf) {
1106                 if (!r600_init_resource(rscreen, resource, rtex->size,
1107                                         rtex->surface.bo_alignment)) {
1108                         FREE(rtex);
1109                         return NULL;
1110                 }
1111         } else {
1112                 resource->buf = buf;
1113                 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
1114                 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
1115         }
1116
1117         if (rtex->cmask.size) {
1118                 /* Initialize the cmask to 0xCC (= compressed state). */
1119                 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
1120                                          rtex->cmask.offset, rtex->cmask.size,
1121                                          0xCCCCCCCC, R600_COHERENCY_NONE);
1122         }
1123
1124         /* Initialize DCC only if the texture is not being imported. */
1125         if (!buf && rtex->dcc_offset) {
1126                 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
1127                                          rtex->dcc_offset,
1128                                          rtex->surface.dcc_size,
1129                                          0xFFFFFFFF, R600_COHERENCY_NONE);
1130         }
1131
1132         /* Initialize the CMASK base register value. */
1133         rtex->cmask.base_address_reg =
1134                 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1135
1136         if (rscreen->debug_flags & DBG_VM) {
1137                 fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1138                         rtex->resource.gpu_address,
1139                         rtex->resource.gpu_address + rtex->resource.buf->size,
1140                         base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
1141                         base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1142         }
1143
1144         if (rscreen->debug_flags & DBG_TEX) {
1145                 puts("Texture:");
1146                 r600_print_texture_info(rtex, stdout);
1147                 fflush(stdout);
1148         }
1149
1150         return rtex;
1151 }
1152
1153 static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
1154                                    const struct pipe_resource *templ)
1155 {
1156         const struct util_format_description *desc = util_format_description(templ->format);
1157         bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
1158
1159         /* MSAA resources must be 2D tiled. */
1160         if (templ->nr_samples > 1)
1161                 return RADEON_SURF_MODE_2D;
1162
1163         /* Transfer resources should be linear. */
1164         if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
1165                 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1166
1167         /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
1168         if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
1169             (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
1170             (templ->target == PIPE_TEXTURE_2D ||
1171              templ->target == PIPE_TEXTURE_3D))
1172                 force_tiling = true;
1173
1174         /* Handle common candidates for the linear mode.
1175          * Compressed textures and DB surfaces must always be tiled.
1176          */
1177         if (!force_tiling && !util_format_is_compressed(templ->format) &&
1178             (!util_format_is_depth_or_stencil(templ->format) ||
1179              templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)) {
1180                 if (rscreen->debug_flags & DBG_NO_TILING)
1181                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1182
1183                 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
1184                 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1185                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1186
1187                 /* Cursors are linear on SI.
1188                  * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1189                 if (rscreen->chip_class >= SI &&
1190                     (templ->bind & PIPE_BIND_CURSOR))
1191                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1192
1193                 if (templ->bind & PIPE_BIND_LINEAR)
1194                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1195
1196                 /* Textures with a very small height are recommended to be linear. */
1197                 if (templ->target == PIPE_TEXTURE_1D ||
1198                     templ->target == PIPE_TEXTURE_1D_ARRAY ||
1199                     templ->height0 <= 4)
1200                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1201
1202                 /* Textures likely to be mapped often. */
1203                 if (templ->usage == PIPE_USAGE_STAGING ||
1204                     templ->usage == PIPE_USAGE_STREAM)
1205                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
1206         }
1207
1208         /* Make small textures 1D tiled. */
1209         if (templ->width0 <= 16 || templ->height0 <= 16 ||
1210             (rscreen->debug_flags & DBG_NO_2D_TILING))
1211                 return RADEON_SURF_MODE_1D;
1212
1213         /* The allocator will switch to 1D if needed. */
1214         return RADEON_SURF_MODE_2D;
1215 }
1216
1217 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
1218                                           const struct pipe_resource *templ)
1219 {
1220         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1221         struct radeon_surf surface = {0};
1222         int r;
1223
1224         r = r600_init_surface(rscreen, &surface, templ,
1225                               r600_choose_tiling(rscreen, templ),
1226                               templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
1227         if (r) {
1228                 return NULL;
1229         }
1230         r = rscreen->ws->surface_best(rscreen->ws, &surface);
1231         if (r) {
1232                 return NULL;
1233         }
1234         return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0,
1235                                                                   0, NULL, &surface);
1236 }
1237
1238 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
1239                                                       const struct pipe_resource *templ,
1240                                                       struct winsys_handle *whandle,
1241                                                       unsigned usage)
1242 {
1243         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1244         struct pb_buffer *buf = NULL;
1245         unsigned stride = 0, offset = 0;
1246         unsigned array_mode;
1247         struct radeon_surf surface;
1248         int r;
1249         struct radeon_bo_metadata metadata = {};
1250         struct r600_texture *rtex;
1251
1252         /* Support only 2D textures without mipmaps */
1253         if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
1254               templ->depth0 != 1 || templ->last_level != 0)
1255                 return NULL;
1256
1257         buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
1258         if (!buf)
1259                 return NULL;
1260
1261         rscreen->ws->buffer_get_metadata(buf, &metadata);
1262
1263         surface.pipe_config = metadata.pipe_config;
1264         surface.bankw = metadata.bankw;
1265         surface.bankh = metadata.bankh;
1266         surface.tile_split = metadata.tile_split;
1267         surface.mtilea = metadata.mtilea;
1268         surface.num_banks = metadata.num_banks;
1269
1270         if (metadata.macrotile == RADEON_LAYOUT_TILED)
1271                 array_mode = RADEON_SURF_MODE_2D;
1272         else if (metadata.microtile == RADEON_LAYOUT_TILED)
1273                 array_mode = RADEON_SURF_MODE_1D;
1274         else
1275                 array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1276
1277         r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
1278         if (r) {
1279                 return NULL;
1280         }
1281
1282         if (metadata.scanout)
1283                 surface.flags |= RADEON_SURF_SCANOUT;
1284
1285         rtex = r600_texture_create_object(screen, templ, stride,
1286                                           offset, buf, &surface);
1287         if (!rtex)
1288                 return NULL;
1289
1290         rtex->resource.is_shared = true;
1291         rtex->resource.external_usage = usage;
1292
1293         if (rscreen->apply_opaque_metadata)
1294                 rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
1295
1296         return &rtex->resource.b.b;
1297 }
1298
1299 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
1300                                      struct pipe_resource *texture,
1301                                      struct r600_texture **staging)
1302 {
1303         struct r600_texture *rtex = (struct r600_texture*)texture;
1304         struct pipe_resource resource;
1305         struct r600_texture **flushed_depth_texture = staging ?
1306                         staging : &rtex->flushed_depth_texture;
1307         enum pipe_format pipe_format = texture->format;
1308
1309         if (!staging) {
1310                 if (rtex->flushed_depth_texture)
1311                         return true; /* it's ready */
1312
1313                 if (!rtex->can_sample_z && rtex->can_sample_s) {
1314                         switch (pipe_format) {
1315                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1316                                 /* Save memory by not allocating the S plane. */
1317                                 pipe_format = PIPE_FORMAT_Z32_FLOAT;
1318                                 break;
1319                         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1320                         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1321                                 /* Save memory bandwidth by not copying the
1322                                  * stencil part during flush.
1323                                  *
1324                                  * This potentially increases memory bandwidth
1325                                  * if an application uses both Z and S texturing
1326                                  * simultaneously (a flushed Z24S8 texture
1327                                  * would be stored compactly), but how often
1328                                  * does that really happen?
1329                                  */
1330                                 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1331                                 break;
1332                         default:;
1333                         }
1334                 } else if (!rtex->can_sample_s && rtex->can_sample_z) {
1335                         assert(util_format_has_stencil(util_format_description(pipe_format)));
1336
1337                         /* DB->CB copies to an 8bpp surface don't work. */
1338                         pipe_format = PIPE_FORMAT_X24S8_UINT;
1339                 }
1340         }
1341
1342         resource.target = texture->target;
1343         resource.format = pipe_format;
1344         resource.width0 = texture->width0;
1345         resource.height0 = texture->height0;
1346         resource.depth0 = texture->depth0;
1347         resource.array_size = texture->array_size;
1348         resource.last_level = texture->last_level;
1349         resource.nr_samples = texture->nr_samples;
1350         resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1351         resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1352         resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1353
1354         if (staging)
1355                 resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
1356
1357         *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1358         if (*flushed_depth_texture == NULL) {
1359                 R600_ERR("failed to create temporary texture to hold flushed depth\n");
1360                 return false;
1361         }
1362
1363         (*flushed_depth_texture)->non_disp_tiling = false;
1364         return true;
1365 }
1366
1367 /**
1368  * Initialize the pipe_resource descriptor to be of the same size as the box,
1369  * which is supposed to hold a subregion of the texture "orig" at the given
1370  * mipmap level.
1371  */
1372 static void r600_init_temp_resource_from_box(struct pipe_resource *res,
1373                                              struct pipe_resource *orig,
1374                                              const struct pipe_box *box,
1375                                              unsigned level, unsigned flags)
1376 {
1377         memset(res, 0, sizeof(*res));
1378         res->format = orig->format;
1379         res->width0 = box->width;
1380         res->height0 = box->height;
1381         res->depth0 = 1;
1382         res->array_size = 1;
1383         res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1384         res->flags = flags;
1385
1386         /* We must set the correct texture target and dimensions for a 3D box. */
1387         if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1388                 res->target = PIPE_TEXTURE_2D_ARRAY;
1389                 res->array_size = box->depth;
1390         } else {
1391                 res->target = PIPE_TEXTURE_2D;
1392         }
1393 }
1394
1395 static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
1396                                         struct r600_texture *rtex,
1397                                         unsigned transfer_usage,
1398                                         const struct pipe_box *box)
1399 {
1400         /* r600g doesn't react to dirty_tex_descriptor_counter */
1401         return rscreen->chip_class >= SI &&
1402                 !rtex->resource.is_shared &&
1403                 !(transfer_usage & PIPE_TRANSFER_READ) &&
1404                 rtex->resource.b.b.last_level == 0 &&
1405                 util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
1406                                                  box->x, box->y, box->z,
1407                                                  box->width, box->height,
1408                                                  box->depth);
1409 }
1410
1411 static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
1412                                             struct r600_texture *rtex)
1413 {
1414         struct r600_common_screen *rscreen = rctx->screen;
1415
1416         /* There is no point in discarding depth and tiled buffers. */
1417         assert(!rtex->is_depth);
1418         assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
1419
1420         /* Reallocate the buffer in the same pipe_resource. */
1421         r600_init_resource(rscreen, &rtex->resource, rtex->size,
1422                            rtex->surface.bo_alignment);
1423
1424         /* Initialize the CMASK base address (needed even without CMASK). */
1425         rtex->cmask.base_address_reg =
1426                 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1427
1428         r600_dirty_all_framebuffer_states(rscreen);
1429         p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
1430
1431         rctx->num_alloc_tex_transfer_bytes += rtex->size;
1432 }
1433
1434 static void *r600_texture_transfer_map(struct pipe_context *ctx,
1435                                        struct pipe_resource *texture,
1436                                        unsigned level,
1437                                        unsigned usage,
1438                                        const struct pipe_box *box,
1439                                        struct pipe_transfer **ptransfer)
1440 {
1441         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1442         struct r600_texture *rtex = (struct r600_texture*)texture;
1443         struct r600_transfer *trans;
1444         struct r600_resource *buf;
1445         unsigned offset = 0;
1446         char *map;
1447         bool use_staging_texture = false;
1448
1449         assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
1450
1451         /* Depth textures use staging unconditionally. */
1452         if (!rtex->is_depth) {
1453                 /* Degrade the tile mode if we get too many transfers on APUs.
1454                  * On dGPUs, the staging texture is always faster.
1455                  * Only count uploads that are at least 4x4 pixels large.
1456                  */
1457                 if (!rctx->screen->info.has_dedicated_vram &&
1458                     level == 0 &&
1459                     box->width >= 4 && box->height >= 4 &&
1460                     p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
1461                         bool can_invalidate =
1462                                 r600_can_invalidate_texture(rctx->screen, rtex,
1463                                                             usage, box);
1464
1465                         r600_degrade_tile_mode_to_linear(rctx, rtex,
1466                                                          can_invalidate);
1467                 }
1468
1469                 /* Tiled textures need to be converted into a linear texture for CPU
1470                  * access. The staging texture is always linear and is placed in GART.
1471                  *
1472                  * Reading from VRAM is slow, always use the staging texture in
1473                  * this case.
1474                  *
1475                  * Use the staging texture for uploads if the underlying BO
1476                  * is busy.
1477                  */
1478                 if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D)
1479                         use_staging_texture = true;
1480                 else if (usage & PIPE_TRANSFER_READ)
1481                         use_staging_texture = (rtex->resource.domains &
1482                                                RADEON_DOMAIN_VRAM) != 0;
1483                 /* Write & linear only: */
1484                 else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
1485                                                          RADEON_USAGE_READWRITE) ||
1486                          !rctx->ws->buffer_wait(rtex->resource.buf, 0,
1487                                                 RADEON_USAGE_READWRITE)) {
1488                         /* It's busy. */
1489                         if (r600_can_invalidate_texture(rctx->screen, rtex,
1490                                                         usage, box))
1491                                 r600_texture_invalidate_storage(rctx, rtex);
1492                         else
1493                                 use_staging_texture = true;
1494                 }
1495         }
1496
1497         trans = CALLOC_STRUCT(r600_transfer);
1498         if (!trans)
1499                 return NULL;
1500         trans->transfer.resource = texture;
1501         trans->transfer.level = level;
1502         trans->transfer.usage = usage;
1503         trans->transfer.box = *box;
1504
1505         if (rtex->is_depth) {
1506                 struct r600_texture *staging_depth;
1507
1508                 if (rtex->resource.b.b.nr_samples > 1) {
1509                         /* MSAA depth buffers need to be converted to single sample buffers.
1510                          *
1511                          * Mapping MSAA depth buffers can occur if ReadPixels is called
1512                          * with a multisample GLX visual.
1513                          *
1514                          * First downsample the depth buffer to a temporary texture,
1515                          * then decompress the temporary one to staging.
1516                          *
1517                          * Only the region being mapped is transfered.
1518                          */
1519                         struct pipe_resource resource;
1520
1521                         r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
1522
1523                         if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
1524                                 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1525                                 FREE(trans);
1526                                 return NULL;
1527                         }
1528
1529                         if (usage & PIPE_TRANSFER_READ) {
1530                                 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
1531                                 if (!temp) {
1532                                         R600_ERR("failed to create a temporary depth texture\n");
1533                                         FREE(trans);
1534                                         return NULL;
1535                                 }
1536
1537                                 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
1538                                 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
1539                                                             0, 0, 0, box->depth, 0, 0);
1540                                 pipe_resource_reference(&temp, NULL);
1541                         }
1542                 }
1543                 else {
1544                         /* XXX: only readback the rectangle which is being mapped? */
1545                         /* XXX: when discard is true, no need to read back from depth texture */
1546                         if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
1547                                 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1548                                 FREE(trans);
1549                                 return NULL;
1550                         }
1551
1552                         rctx->blit_decompress_depth(ctx, rtex, staging_depth,
1553                                                     level, level,
1554                                                     box->z, box->z + box->depth - 1,
1555                                                     0, 0);
1556
1557                         offset = r600_texture_get_offset(staging_depth, level, box);
1558                 }
1559
1560                 trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
1561                 trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
1562                 trans->staging = (struct r600_resource*)staging_depth;
1563                 buf = trans->staging;
1564         } else if (use_staging_texture) {
1565                 struct pipe_resource resource;
1566                 struct r600_texture *staging;
1567
1568                 r600_init_temp_resource_from_box(&resource, texture, box, level,
1569                                                  R600_RESOURCE_FLAG_TRANSFER);
1570                 resource.usage = (usage & PIPE_TRANSFER_READ) ?
1571                         PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1572
1573                 /* Create the temporary texture. */
1574                 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
1575                 if (!staging) {
1576                         R600_ERR("failed to create temporary texture to hold untiled copy\n");
1577                         FREE(trans);
1578                         return NULL;
1579                 }
1580                 trans->staging = &staging->resource;
1581                 trans->transfer.stride = staging->surface.level[0].pitch_bytes;
1582                 trans->transfer.layer_stride = staging->surface.level[0].slice_size;
1583
1584                 if (usage & PIPE_TRANSFER_READ)
1585                         r600_copy_to_staging_texture(ctx, trans);
1586                 else
1587                         usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1588
1589                 buf = trans->staging;
1590         } else {
1591                 /* the resource is mapped directly */
1592                 trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
1593                 trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
1594                 offset = r600_texture_get_offset(rtex, level, box);
1595                 buf = &rtex->resource;
1596         }
1597
1598         if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
1599                 r600_resource_reference(&trans->staging, NULL);
1600                 FREE(trans);
1601                 return NULL;
1602         }
1603
1604         *ptransfer = &trans->transfer;
1605         return map + offset;
1606 }
1607
1608 static void r600_texture_transfer_unmap(struct pipe_context *ctx,
1609                                         struct pipe_transfer* transfer)
1610 {
1611         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1612         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
1613         struct pipe_resource *texture = transfer->resource;
1614         struct r600_texture *rtex = (struct r600_texture*)texture;
1615
1616         if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
1617                 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
1618                         ctx->resource_copy_region(ctx, texture, transfer->level,
1619                                                   transfer->box.x, transfer->box.y, transfer->box.z,
1620                                                   &rtransfer->staging->b.b, transfer->level,
1621                                                   &transfer->box);
1622                 } else {
1623                         r600_copy_from_staging_texture(ctx, rtransfer);
1624                 }
1625         }
1626
1627         if (rtransfer->staging) {
1628                 rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
1629                 r600_resource_reference(&rtransfer->staging, NULL);
1630         }
1631
1632         /* Heuristic for {upload, draw, upload, draw, ..}:
1633          *
1634          * Flush the gfx IB if we've allocated too much texture storage.
1635          *
1636          * The idea is that we don't want to build IBs that use too much
1637          * memory and put pressure on the kernel memory manager and we also
1638          * want to make temporary and invalidated buffers go idle ASAP to
1639          * decrease the total memory usage or make them reusable. The memory
1640          * usage will be slightly higher than given here because of the buffer
1641          * cache in the winsys.
1642          *
1643          * The result is that the kernel memory manager is never a bottleneck.
1644          */
1645         if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
1646                 rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
1647                 rctx->num_alloc_tex_transfer_bytes = 0;
1648         }
1649
1650         FREE(transfer);
1651 }
1652
1653 static const struct u_resource_vtbl r600_texture_vtbl =
1654 {
1655         NULL,                           /* get_handle */
1656         r600_texture_destroy,           /* resource_destroy */
1657         r600_texture_transfer_map,      /* transfer_map */
1658         u_default_transfer_flush_region, /* transfer_flush_region */
1659         r600_texture_transfer_unmap,    /* transfer_unmap */
1660 };
1661
1662 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
1663                                                 struct pipe_resource *texture,
1664                                                 const struct pipe_surface *templ,
1665                                                 unsigned width, unsigned height)
1666 {
1667         struct r600_texture *rtex = (struct r600_texture*)texture;
1668         struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
1669
1670         if (!surface)
1671                 return NULL;
1672
1673         assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
1674         assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
1675
1676         pipe_reference_init(&surface->base.reference, 1);
1677         pipe_resource_reference(&surface->base.texture, texture);
1678         surface->base.context = pipe;
1679         surface->base.format = templ->format;
1680         surface->base.width = width;
1681         surface->base.height = height;
1682         surface->base.u = templ->u;
1683         surface->level_info = &rtex->surface.level[templ->u.tex.level];
1684         return &surface->base;
1685 }
1686
1687 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
1688                                                 struct pipe_resource *tex,
1689                                                 const struct pipe_surface *templ)
1690 {
1691         unsigned level = templ->u.tex.level;
1692         unsigned width = u_minify(tex->width0, level);
1693         unsigned height = u_minify(tex->height0, level);
1694
1695         if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
1696                 const struct util_format_description *tex_desc
1697                         = util_format_description(tex->format);
1698                 const struct util_format_description *templ_desc
1699                         = util_format_description(templ->format);
1700
1701                 assert(tex_desc->block.bits == templ_desc->block.bits);
1702
1703                 /* Adjust size of surface if and only if the block width or
1704                  * height is changed. */
1705                 if (tex_desc->block.width != templ_desc->block.width ||
1706                     tex_desc->block.height != templ_desc->block.height) {
1707                         unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
1708                         unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
1709
1710                         width = nblks_x * templ_desc->block.width;
1711                         height = nblks_y * templ_desc->block.height;
1712                 }
1713         }
1714
1715         return r600_create_surface_custom(pipe, tex, templ, width, height);
1716 }
1717
1718 static void r600_surface_destroy(struct pipe_context *pipe,
1719                                  struct pipe_surface *surface)
1720 {
1721         struct r600_surface *surf = (struct r600_surface*)surface;
1722         r600_resource_reference(&surf->cb_buffer_fmask, NULL);
1723         r600_resource_reference(&surf->cb_buffer_cmask, NULL);
1724         pipe_resource_reference(&surface->texture, NULL);
1725         FREE(surface);
1726 }
1727
1728 static void r600_clear_texture(struct pipe_context *pipe,
1729                                struct pipe_resource *tex,
1730                                unsigned level,
1731                                const struct pipe_box *box,
1732                                const void *data)
1733 {
1734         struct pipe_screen *screen = pipe->screen;
1735         struct r600_texture *rtex = (struct r600_texture*)tex;
1736         struct pipe_surface tmpl = {{0}};
1737         struct pipe_surface *sf;
1738         const struct util_format_description *desc =
1739                 util_format_description(tex->format);
1740
1741         tmpl.format = tex->format;
1742         tmpl.u.tex.first_layer = box->z;
1743         tmpl.u.tex.last_layer = box->z + box->depth - 1;
1744         tmpl.u.tex.level = level;
1745         sf = pipe->create_surface(pipe, tex, &tmpl);
1746         if (!sf)
1747                 return;
1748
1749         if (rtex->is_depth) {
1750                 unsigned clear;
1751                 float depth;
1752                 uint8_t stencil = 0;
1753
1754                 /* Depth is always present. */
1755                 clear = PIPE_CLEAR_DEPTH;
1756                 desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
1757
1758                 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
1759                         clear |= PIPE_CLEAR_STENCIL;
1760                         desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
1761                 }
1762
1763                 pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
1764                                           box->x, box->y,
1765                                           box->width, box->height, false);
1766         } else {
1767                 union pipe_color_union color;
1768
1769                 /* pipe_color_union requires the full vec4 representation. */
1770                 if (util_format_is_pure_uint(tex->format))
1771                         desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
1772                 else if (util_format_is_pure_sint(tex->format))
1773                         desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
1774                 else
1775                         desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
1776
1777                 if (screen->is_format_supported(screen, tex->format,
1778                                                 tex->target, 0,
1779                                                 PIPE_BIND_RENDER_TARGET)) {
1780                         pipe->clear_render_target(pipe, sf, &color,
1781                                                   box->x, box->y,
1782                                                   box->width, box->height, false);
1783                 } else {
1784                         /* Software fallback - just for R9G9B9E5_FLOAT */
1785                         util_clear_render_target(pipe, sf, &color,
1786                                                  box->x, box->y,
1787                                                  box->width, box->height);
1788                 }
1789         }
1790         pipe_surface_reference(&sf, NULL);
1791 }
1792
1793 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
1794 {
1795         const struct util_format_description *desc = util_format_description(format);
1796
1797 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
1798
1799         if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1800                 return V_0280A0_SWAP_STD;
1801
1802         if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1803                 return ~0U;
1804
1805         switch (desc->nr_channels) {
1806         case 1:
1807                 if (HAS_SWIZZLE(0,X))
1808                         return V_0280A0_SWAP_STD; /* X___ */
1809                 else if (HAS_SWIZZLE(3,X))
1810                         return V_0280A0_SWAP_ALT_REV; /* ___X */
1811                 break;
1812         case 2:
1813                 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
1814                     (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
1815                     (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
1816                         return V_0280A0_SWAP_STD; /* XY__ */
1817                 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
1818                          (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
1819                          (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
1820                         /* YX__ */
1821                         return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
1822                 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
1823                         return V_0280A0_SWAP_ALT; /* X__Y */
1824                 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
1825                         return V_0280A0_SWAP_ALT_REV; /* Y__X */
1826                 break;
1827         case 3:
1828                 if (HAS_SWIZZLE(0,X))
1829                         return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
1830                 else if (HAS_SWIZZLE(0,Z))
1831                         return V_0280A0_SWAP_STD_REV; /* ZYX */
1832                 break;
1833         case 4:
1834                 /* check the middle channels, the 1st and 4th channel can be NONE */
1835                 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
1836                         return V_0280A0_SWAP_STD; /* XYZW */
1837                 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
1838                         return V_0280A0_SWAP_STD_REV; /* WZYX */
1839                 } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
1840                         return V_0280A0_SWAP_ALT; /* ZYXW */
1841                 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
1842                         /* YZWX */
1843                         if (desc->is_array)
1844                                 return V_0280A0_SWAP_ALT_REV;
1845                         else
1846                                 return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
1847                 }
1848                 break;
1849         }
1850         return ~0U;
1851 }
1852
1853 /* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
1854
1855 static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx,
1856                                          int slot)
1857 {
1858         int i;
1859
1860         if (rctx->dcc_stats[slot].query_active)
1861                 vi_separate_dcc_stop_query(&rctx->b,
1862                                            rctx->dcc_stats[slot].tex);
1863
1864         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++)
1865                 if (rctx->dcc_stats[slot].ps_stats[i]) {
1866                         rctx->b.destroy_query(&rctx->b,
1867                                               rctx->dcc_stats[slot].ps_stats[i]);
1868                         rctx->dcc_stats[slot].ps_stats[i] = NULL;
1869                 }
1870
1871         r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL);
1872 }
1873
1874 /**
1875  * Return the per-context slot where DCC statistics queries for the texture live.
1876  */
1877 static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx,
1878                                                struct r600_texture *tex)
1879 {
1880         int i, empty_slot = -1;
1881
1882         /* Remove zombie textures (textures kept alive by this array only). */
1883         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++)
1884                 if (rctx->dcc_stats[i].tex &&
1885                     rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1)
1886                         vi_dcc_clean_up_context_slot(rctx, i);
1887
1888         /* Find the texture. */
1889         for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
1890                 /* Return if found. */
1891                 if (rctx->dcc_stats[i].tex == tex) {
1892                         rctx->dcc_stats[i].last_use_timestamp = os_time_get();
1893                         return i;
1894                 }
1895
1896                 /* Record the first seen empty slot. */
1897                 if (empty_slot == -1 && !rctx->dcc_stats[i].tex)
1898                         empty_slot = i;
1899         }
1900
1901         /* Not found. Remove the oldest member to make space in the array. */
1902         if (empty_slot == -1) {
1903                 int oldest_slot = 0;
1904
1905                 /* Find the oldest slot. */
1906                 for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++)
1907                         if (rctx->dcc_stats[oldest_slot].last_use_timestamp >
1908                             rctx->dcc_stats[i].last_use_timestamp)
1909                                 oldest_slot = i;
1910
1911                 /* Clean up the oldest slot. */
1912                 vi_dcc_clean_up_context_slot(rctx, oldest_slot);
1913                 empty_slot = oldest_slot;
1914         }
1915
1916         /* Add the texture to the new slot. */
1917         r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex);
1918         rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
1919         return empty_slot;
1920 }
1921
1922 static struct pipe_query *
1923 vi_create_resuming_pipestats_query(struct pipe_context *ctx)
1924 {
1925         struct r600_query_hw *query = (struct r600_query_hw*)
1926                 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
1927
1928         query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES;
1929         return (struct pipe_query*)query;
1930 }
1931
1932 /**
1933  * Called when binding a color buffer.
1934  */
1935 void vi_separate_dcc_start_query(struct pipe_context *ctx,
1936                                  struct r600_texture *tex)
1937 {
1938         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1939         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
1940
1941         assert(!rctx->dcc_stats[i].query_active);
1942
1943         if (!rctx->dcc_stats[i].ps_stats[0])
1944                 rctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(ctx);
1945
1946         /* begin or resume the query */
1947         ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
1948         rctx->dcc_stats[i].query_active = true;
1949 }
1950
1951 /**
1952  * Called when unbinding a color buffer.
1953  */
1954 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
1955                                 struct r600_texture *tex)
1956 {
1957         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1958         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
1959
1960         assert(rctx->dcc_stats[i].query_active);
1961         assert(rctx->dcc_stats[i].ps_stats[0]);
1962
1963         /* pause or end the query */
1964         ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
1965         rctx->dcc_stats[i].query_active = false;
1966 }
1967
1968 static bool vi_should_enable_separate_dcc(struct r600_texture *tex)
1969 {
1970         /* The minimum number of fullscreen draws per frame that is required
1971          * to enable DCC. */
1972         return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
1973 }
1974
1975 /* Called by fast clear. */
1976 static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
1977                                        struct r600_texture *tex)
1978 {
1979         /* The intent is to use this with shared displayable back buffers,
1980          * but it's not strictly limited only to them.
1981          */
1982         if (!tex->resource.is_shared ||
1983             !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
1984             tex->resource.b.b.target != PIPE_TEXTURE_2D ||
1985             tex->surface.last_level > 0 ||
1986             !tex->surface.dcc_size)
1987                 return;
1988
1989         if (tex->dcc_offset)
1990                 return; /* already enabled */
1991
1992         /* Enable the DCC stat gathering. */
1993         if (!tex->dcc_gather_statistics) {
1994                 tex->dcc_gather_statistics = true;
1995                 vi_separate_dcc_start_query(&rctx->b, tex);
1996         }
1997
1998         if (!vi_should_enable_separate_dcc(tex))
1999                 return; /* stats show that DCC decompression is too expensive */
2000
2001         assert(tex->surface.level[0].dcc_enabled);
2002         assert(!tex->dcc_separate_buffer);
2003
2004         r600_texture_discard_cmask(rctx->screen, tex);
2005
2006         /* Get a DCC buffer. */
2007         if (tex->last_dcc_separate_buffer) {
2008                 assert(tex->dcc_gather_statistics);
2009                 assert(!tex->dcc_separate_buffer);
2010                 tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
2011                 tex->last_dcc_separate_buffer = NULL;
2012         } else {
2013                 tex->dcc_separate_buffer = (struct r600_resource*)
2014                         r600_aligned_buffer_create(rctx->b.screen, 0,
2015                                                    PIPE_USAGE_DEFAULT,
2016                                                    tex->surface.dcc_size,
2017                                                    tex->surface.dcc_alignment);
2018                 if (!tex->dcc_separate_buffer)
2019                         return;
2020         }
2021
2022         /* dcc_offset is the absolute GPUVM address. */
2023         tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
2024
2025         /* no need to flag anything since this is called by fast clear that
2026          * flags framebuffer state
2027          */
2028 }
2029
2030 /**
2031  * Called by pipe_context::flush_resource, the place where DCC decompression
2032  * takes place.
2033  */
2034 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
2035                                              struct r600_texture *tex)
2036 {
2037         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
2038         struct pipe_query *tmp;
2039         unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
2040         bool query_active = rctx->dcc_stats[i].query_active;
2041         bool disable = false;
2042
2043         if (rctx->dcc_stats[i].ps_stats[2]) {
2044                 union pipe_query_result result;
2045
2046                 /* Read the results. */
2047                 ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
2048                                       true, &result);
2049                 r600_query_hw_reset_buffers(rctx,
2050                                             (struct r600_query_hw*)
2051                                             rctx->dcc_stats[i].ps_stats[2]);
2052
2053                 /* Compute the approximate number of fullscreen draws. */
2054                 tex->ps_draw_ratio =
2055                         result.pipeline_statistics.ps_invocations /
2056                         (tex->resource.b.b.width0 * tex->resource.b.b.height0);
2057                 rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
2058
2059                 disable = tex->dcc_separate_buffer &&
2060                           !vi_should_enable_separate_dcc(tex);
2061         }
2062
2063         tex->num_slow_clears = 0;
2064
2065         /* stop the statistics query for ps_stats[0] */
2066         if (query_active)
2067                 vi_separate_dcc_stop_query(ctx, tex);
2068
2069         /* Move the queries in the queue by one. */
2070         tmp = rctx->dcc_stats[i].ps_stats[2];
2071         rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1];
2072         rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0];
2073         rctx->dcc_stats[i].ps_stats[0] = tmp;
2074
2075         /* create and start a new query as ps_stats[0] */
2076         if (query_active)
2077                 vi_separate_dcc_start_query(ctx, tex);
2078
2079         if (disable) {
2080                 assert(!tex->last_dcc_separate_buffer);
2081                 tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
2082                 tex->dcc_separate_buffer = NULL;
2083                 tex->dcc_offset = 0;
2084                 /* no need to flag anything since this is called after
2085                  * decompression that re-sets framebuffer state
2086                  */
2087         }
2088 }
2089
2090 /* FAST COLOR CLEAR */
2091
2092 static void evergreen_set_clear_color(struct r600_texture *rtex,
2093                                       enum pipe_format surface_format,
2094                                       const union pipe_color_union *color)
2095 {
2096         union util_color uc;
2097
2098         memset(&uc, 0, sizeof(uc));
2099
2100         if (util_format_is_pure_uint(surface_format)) {
2101                 util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
2102         } else if (util_format_is_pure_sint(surface_format)) {
2103                 util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
2104         } else {
2105                 util_pack_color(color->f, surface_format, &uc);
2106         }
2107
2108         memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
2109 }
2110
2111 static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
2112                                          const union pipe_color_union *color,
2113                                          uint32_t* reset_value,
2114                                          bool* clear_words_needed)
2115 {
2116         bool values[4] = {};
2117         int i;
2118         bool main_value = false;
2119         bool extra_value = false;
2120         int extra_channel;
2121         const struct util_format_description *desc = util_format_description(surface_format);
2122
2123         *clear_words_needed = true;
2124         *reset_value = 0x20202020U;
2125
2126         /* If we want to clear without needing a fast clear eliminate step, we
2127          * can set each channel to 0 or 1 (or 0/max for integer formats). We
2128          * have two sets of flags, one for the last or first channel(extra) and
2129          * one for the other channels(main).
2130          */
2131
2132         if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
2133             surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
2134             surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
2135                 extra_channel = -1;
2136         } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2137                 if(r600_translate_colorswap(surface_format, false) <= 1)
2138                         extra_channel = desc->nr_channels - 1;
2139                 else
2140                         extra_channel = 0;
2141         } else
2142                 return;
2143
2144         for (i = 0; i < 4; ++i) {
2145                 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
2146
2147                 if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
2148                     desc->swizzle[i] > PIPE_SWIZZLE_W)
2149                         continue;
2150
2151                 if (util_format_is_pure_sint(surface_format)) {
2152                         values[i] = color->i[i] != 0;
2153                         if (color->i[i] != 0 && color->i[i] != INT32_MAX)
2154                                 return;
2155                 } else if (util_format_is_pure_uint(surface_format)) {
2156                         values[i] = color->ui[i] != 0U;
2157                         if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX)
2158                                 return;
2159                 } else {
2160                         values[i] = color->f[i] != 0.0F;
2161                         if (color->f[i] != 0.0F && color->f[i] != 1.0F)
2162                                 return;
2163                 }
2164
2165                 if (index == extra_channel)
2166                         extra_value = values[i];
2167                 else
2168                         main_value = values[i];
2169         }
2170
2171         for (int i = 0; i < 4; ++i)
2172                 if (values[i] != main_value &&
2173                     desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
2174                     desc->swizzle[i] >= PIPE_SWIZZLE_X &&
2175                     desc->swizzle[i] <= PIPE_SWIZZLE_W)
2176                         return;
2177
2178         *clear_words_needed = false;
2179         if (main_value)
2180                 *reset_value |= 0x80808080U;
2181
2182         if (extra_value)
2183                 *reset_value |= 0x40404040U;
2184 }
2185
2186 void vi_dcc_clear_level(struct r600_common_context *rctx,
2187                         struct r600_texture *rtex,
2188                         unsigned level, unsigned clear_value)
2189 {
2190         struct pipe_resource *dcc_buffer;
2191         uint64_t dcc_offset;
2192
2193         assert(rtex->dcc_offset && rtex->surface.level[level].dcc_enabled);
2194
2195         if (rtex->dcc_separate_buffer) {
2196                 dcc_buffer = &rtex->dcc_separate_buffer->b.b;
2197                 dcc_offset = 0;
2198         } else {
2199                 dcc_buffer = &rtex->resource.b.b;
2200                 dcc_offset = rtex->dcc_offset;
2201         }
2202
2203         dcc_offset += rtex->surface.level[level].dcc_offset;
2204
2205         rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset,
2206                            rtex->surface.level[level].dcc_fast_clear_size,
2207                            clear_value, R600_COHERENCY_CB_META);
2208 }
2209
2210 /* Set the same micro tile mode as the destination of the last MSAA resolve.
2211  * This allows hitting the MSAA resolve fast path, which requires that both
2212  * src and dst micro tile modes match.
2213  */
2214 static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
2215                                            struct r600_texture *rtex)
2216 {
2217         if (rtex->resource.is_shared ||
2218             rtex->surface.nsamples <= 1 ||
2219             rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
2220                 return;
2221
2222         assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
2223         assert(rtex->surface.last_level == 0);
2224
2225         /* These magic numbers were copied from addrlib. It doesn't use any
2226          * definitions for them either. They are all 2D_TILED_THIN1 modes with
2227          * different bpp and micro tile mode.
2228          */
2229         if (rscreen->chip_class >= CIK) {
2230                 switch (rtex->last_msaa_resolve_target_micro_mode) {
2231                 case 0: /* displayable */
2232                         rtex->surface.tiling_index[0] = 10;
2233                         break;
2234                 case 1: /* thin */
2235                         rtex->surface.tiling_index[0] = 14;
2236                         break;
2237                 case 3: /* rotated */
2238                         rtex->surface.tiling_index[0] = 28;
2239                         break;
2240                 default: /* depth, thick */
2241                         assert(!"unexpected micro mode");
2242                         return;
2243                 }
2244         } else { /* SI */
2245                 switch (rtex->last_msaa_resolve_target_micro_mode) {
2246                 case 0: /* displayable */
2247                         switch (rtex->surface.bpe) {
2248                         case 8:
2249                             rtex->surface.tiling_index[0] = 10;
2250                             break;
2251                         case 16:
2252                             rtex->surface.tiling_index[0] = 11;
2253                             break;
2254                         default: /* 32, 64 */
2255                             rtex->surface.tiling_index[0] = 12;
2256                             break;
2257                         }
2258                         break;
2259                 case 1: /* thin */
2260                         switch (rtex->surface.bpe) {
2261                         case 8:
2262                                 rtex->surface.tiling_index[0] = 14;
2263                                 break;
2264                         case 16:
2265                                 rtex->surface.tiling_index[0] = 15;
2266                                 break;
2267                         case 32:
2268                                 rtex->surface.tiling_index[0] = 16;
2269                                 break;
2270                         default: /* 64, 128 */
2271                                 rtex->surface.tiling_index[0] = 17;
2272                                 break;
2273                         }
2274                         break;
2275                 default: /* depth, thick */
2276                         assert(!"unexpected micro mode");
2277                         return;
2278                 }
2279         }
2280
2281         rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
2282
2283         p_atomic_inc(&rscreen->dirty_fb_counter);
2284         p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
2285 }
2286
2287 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
2288                                    struct pipe_framebuffer_state *fb,
2289                                    struct r600_atom *fb_state,
2290                                    unsigned *buffers, unsigned *dirty_cbufs,
2291                                    const union pipe_color_union *color)
2292 {
2293         int i;
2294
2295         /* This function is broken in BE, so just disable this path for now */
2296 #ifdef PIPE_ARCH_BIG_ENDIAN
2297         return;
2298 #endif
2299
2300         if (rctx->render_cond)
2301                 return;
2302
2303         for (i = 0; i < fb->nr_cbufs; i++) {
2304                 struct r600_texture *tex;
2305                 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
2306
2307                 if (!fb->cbufs[i])
2308                         continue;
2309
2310                 /* if this colorbuffer is not being cleared */
2311                 if (!(*buffers & clear_bit))
2312                         continue;
2313
2314                 tex = (struct r600_texture *)fb->cbufs[i]->texture;
2315
2316                 /* 128-bit formats are unusupported */
2317                 if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
2318                         continue;
2319                 }
2320
2321                 /* the clear is allowed if all layers are bound */
2322                 if (fb->cbufs[i]->u.tex.first_layer != 0 ||
2323                     fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
2324                         continue;
2325                 }
2326
2327                 /* cannot clear mipmapped textures */
2328                 if (fb->cbufs[i]->texture->last_level != 0) {
2329                         continue;
2330                 }
2331
2332                 /* only supported on tiled surfaces */
2333                 if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) {
2334                         continue;
2335                 }
2336
2337                 /* shared textures can't use fast clear without an explicit flush,
2338                  * because there is no way to communicate the clear color among
2339                  * all clients
2340                  */
2341                 if (tex->resource.is_shared &&
2342                     !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
2343                         continue;
2344
2345                 /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
2346                 if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
2347                     rctx->chip_class >= CIK &&
2348                     rctx->screen->info.drm_major == 2 &&
2349                     rctx->screen->info.drm_minor < 38) {
2350                         continue;
2351                 }
2352
2353                 /* Fast clear is the most appropriate place to enable DCC for
2354                  * displayable surfaces.
2355                  */
2356                 if (rctx->chip_class >= VI &&
2357                     !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) {
2358                         vi_separate_dcc_try_enable(rctx, tex);
2359
2360                         /* Stoney can't do a CMASK-based clear, so all clears are
2361                          * considered to be hypothetically slow clears, which
2362                          * is weighed when determining to enable separate DCC.
2363                          */
2364                         if (tex->dcc_gather_statistics &&
2365                             rctx->family == CHIP_STONEY)
2366                                 tex->num_slow_clears++;
2367                 }
2368
2369                 /* Try to clear DCC first, otherwise try CMASK. */
2370                 if (tex->dcc_offset && tex->surface.level[0].dcc_enabled) {
2371                         uint32_t reset_value;
2372                         bool clear_words_needed;
2373
2374                         if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
2375                                 continue;
2376
2377                         /* We can change the micro tile mode before a full clear. */
2378                         if (rctx->screen->chip_class >= SI)
2379                                 si_set_optimal_micro_tile_mode(rctx->screen, tex);
2380
2381                         vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
2382                         vi_dcc_clear_level(rctx, tex, 0, reset_value);
2383
2384                         if (clear_words_needed)
2385                                 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
2386                         tex->separate_dcc_dirty = true;
2387                 } else {
2388                         /* Stoney/RB+ doesn't work with CMASK fast clear. */
2389                         if (rctx->family == CHIP_STONEY)
2390                                 continue;
2391
2392                         /* ensure CMASK is enabled */
2393                         r600_texture_alloc_cmask_separate(rctx->screen, tex);
2394                         if (tex->cmask.size == 0) {
2395                                 continue;
2396                         }
2397
2398                         /* We can change the micro tile mode before a full clear. */
2399                         if (rctx->screen->chip_class >= SI)
2400                                 si_set_optimal_micro_tile_mode(rctx->screen, tex);
2401
2402                         /* Do the fast clear. */
2403                         rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
2404                                            tex->cmask.offset, tex->cmask.size, 0,
2405                                            R600_COHERENCY_CB_META);
2406
2407                         tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
2408                 }
2409
2410                 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
2411
2412                 if (dirty_cbufs)
2413                         *dirty_cbufs |= 1 << i;
2414                 rctx->set_atom_dirty(rctx, fb_state, true);
2415                 *buffers &= ~clear_bit;
2416         }
2417 }
2418
2419 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
2420 {
2421         rscreen->b.resource_from_handle = r600_texture_from_handle;
2422         rscreen->b.resource_get_handle = r600_texture_get_handle;
2423 }
2424
2425 void r600_init_context_texture_functions(struct r600_common_context *rctx)
2426 {
2427         rctx->b.create_surface = r600_create_surface;
2428         rctx->b.surface_destroy = r600_surface_destroy;
2429         rctx->b.clear_texture = r600_clear_texture;
2430 }