r600g: add in-place DB decompression and texturing with DB tiling
authorMarek Olšák <maraeo@gmail.com>
Tue, 2 Oct 2012 20:02:54 +0000 (22:02 +0200)
committerMarek Olšák <maraeo@gmail.com>
Tue, 6 Nov 2012 01:54:16 +0000 (02:54 +0100)
The decompression is done in-place and only the compressed tiles are
decompressed. Note: R6xx-R7xx can do that only with Z16 and Z32F.

The texture unit is programmed to use non-displayable tiling and depth
ordering of samples, so that it can fetch the texture in the native DB format.

The latest version of the libdrm surface allocator is required for stencil
texturing to work. The old one didn't create the mipmap tree correctly.
We need a separate mipmap tree for stencil, because the stencil mipmap
offsets are not really depth offsets/4.

There are still some known bugs, but this should save some memory and it also
improves performance a little bit in Lightsmark (especially with low
resolutions; tested with Radeon HD 5000).

The DB->CB copy is still used for transfers.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/auxiliary/util/u_blitter.c
src/gallium/drivers/r600/evergreen_compute_internal.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_resource.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_texture.c

index bb784d6b37021578f95d75f0c07c0f94b1d1f4d2..d10ce231c348644f843937123f3d73735aa38e89 100644 (file)
@@ -1609,7 +1609,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
    blitter_disable_render_cond(ctx);
 
    /* bind states */
-   pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
+   pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
+                                         ctx->blend[0]);
    pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
    ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE));
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
index 3b1e5812d023ec358cd50a2566fb11fd653dbc54..7bc7fb43f6c37934836c032dd0b6a4a50cbb97ae 100644 (file)
@@ -478,7 +478,7 @@ void evergreen_set_tex_resource(
 
        unsigned format, endian;
        uint32_t word4 = 0, yuv_format = 0, pitch = 0;
-       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+       unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
        unsigned height, depth;
 
        swizzle[0] = 0;
@@ -501,7 +501,7 @@ void evergreen_set_tex_resource(
        pitch = align(tmp->surface.level[0].nblk_x *
                util_format_get_blockwidth(tmp->resource.b.b.format), 8);
        array_mode = tmp->array_mode[0];
-       tile_type = tmp->tile_type;
+       non_disp_tiling = tmp->non_disp_tiling;
 
        assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
        assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
@@ -511,7 +511,7 @@ void evergreen_set_tex_resource(
        evergreen_emit_raw_value(res,
                                (S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
                                S_030000_PITCH((pitch / 8) - 1) |
-                               S_030000_NON_DISP_TILING_ORDER(tile_type) |
+                               S_030000_NON_DISP_TILING_ORDER(non_disp_tiling) |
                                S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
        evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
                                S_030004_TEX_DEPTH(depth - 1) |
index cf3c60fbfc115cea00d90d9975a70a6e95f25a0e..c105e55279f0a5238b60fffa14410ca22c1bb352 100644 (file)
@@ -979,9 +979,11 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
        struct r600_texture *tmp = (struct r600_texture*)texture;
        unsigned format, endian;
        uint32_t word4 = 0, yuv_format = 0, pitch = 0;
-       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+       unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0;
        unsigned height, depth, width;
        unsigned macro_aspect, tile_split, bankh, bankw, nbanks;
+       enum pipe_format pipe_format = state->format;
+       struct radeon_surface_level *surflevel;
 
        if (view == NULL)
                return NULL;
@@ -999,7 +1001,27 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
        swizzle[2] = state->swizzle_b;
        swizzle[3] = state->swizzle_a;
 
-       format = r600_translate_texformat(ctx->screen, state->format,
+       tile_split = tmp->surface.tile_split;
+       surflevel = tmp->surface.level;
+
+       /* Texturing with separate depth and stencil. */
+       if (tmp->is_depth && !tmp->is_flushing_texture) {
+               switch (pipe_format) {
+               case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+                       pipe_format = PIPE_FORMAT_Z32_FLOAT;
+                       break;
+               case PIPE_FORMAT_X24S8_UINT:
+               case PIPE_FORMAT_S8X24_UINT:
+               case PIPE_FORMAT_X32_S8X24_UINT:
+                       pipe_format = PIPE_FORMAT_S8_UINT;
+                       tile_split = tmp->surface.stencil_tile_split;
+                       surflevel = tmp->surface.stencil_level;
+                       break;
+               default:;
+               }
+       }
+
+       format = r600_translate_texformat(ctx->screen, pipe_format,
                                          swizzle,
                                          &word4, &yuv_format);
        assert(format != ~0);
@@ -1008,23 +1030,15 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                return NULL;
        }
 
-       if (tmp->is_depth && !tmp->is_flushing_texture) {
-               if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
-                       FREE(view);
-                       return NULL;
-               }
-               tmp = tmp->flushed_depth_texture;
-       }
-
        endian = r600_colorformat_endian_swap(format);
 
        width = width0;
        height = height0;
-       depth = tmp->surface.level[0].npix_z;
-       pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(state->format);
-       tile_type = tmp->tile_type;
+       depth = surflevel[0].npix_z;
+       pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+       non_disp_tiling = tmp->non_disp_tiling;
 
-       switch (tmp->surface.level[0].mode) {
+       switch (surflevel[0].mode) {
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
                array_mode = V_028C70_ARRAY_LINEAR_ALIGNED;
                break;
@@ -1039,7 +1053,6 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                array_mode = V_028C70_ARRAY_LINEAR_GENERAL;
                break;
        }
-       tile_split = tmp->surface.tile_split;
        macro_aspect = tmp->surface.mtilea;
        bankw = tmp->surface.bankw;
        bankh = tmp->surface.bankh;
@@ -1050,8 +1063,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 
        /* 128 bit formats require tile type = 1 */
        if (rscreen->chip_class == CAYMAN) {
-               if (util_format_get_blocksize(state->format) >= 16)
-                       tile_type = 1;
+               if (util_format_get_blocksize(pipe_format) >= 16)
+                       non_disp_tiling = 1;
        }
        nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
 
@@ -1067,13 +1080,13 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                       S_030000_PITCH((pitch / 8) - 1) |
                                       S_030000_TEX_WIDTH(width - 1));
        if (rscreen->chip_class == CAYMAN)
-               view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(tile_type);
+               view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
        else
-               view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
+               view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
        view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
                                       S_030004_TEX_DEPTH(depth - 1) |
                                       S_030004_ARRAY_MODE(array_mode));
-       view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+       view->tex_resource_words[2] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
 
        /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
        if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
@@ -1087,9 +1100,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                        view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
                }
        } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
-               view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+               view->tex_resource_words[3] = (surflevel[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
        } else {
-               view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+               view->tex_resource_words[3] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
        }
 
        view->tex_resource_words[4] = (word4 |
@@ -1116,7 +1129,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                      S_03001C_BANK_WIDTH(bankw) |
                                      S_03001C_BANK_HEIGHT(bankh) |
                                      S_03001C_MACRO_TILE_ASPECT(macro_aspect) |
-                                     S_03001C_NUM_BANKS(nbanks);
+                                     S_03001C_NUM_BANKS(nbanks) |
+                                     S_03001C_DEPTH_SAMPLE_ORDER(tmp->is_depth && !tmp->is_flushing_texture);
        return &view->base;
 }
 
@@ -1253,17 +1267,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
        unsigned color_info, color_attrib, color_dim = 0;
        unsigned format, swap, ntype, endian;
        uint64_t offset, base_offset;
-       unsigned tile_type, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
+       unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
        const struct util_format_description *desc;
        int i;
        bool blend_clamp = 0, blend_bypass = 0;
 
-       if (rtex->is_depth && !rtex->is_flushing_texture) {
-               r600_init_flushed_depth_texture(&rctx->context, pipe_tex, NULL);
-               rtex = rtex->flushed_depth_texture;
-               assert(rtex);
-       }
-
        offset = rtex->surface.level[level].offset;
        if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
                offset += rtex->surface.level[level].slice_size *
@@ -1278,20 +1286,20 @@ void evergreen_init_color_surface(struct r600_context *rctx,
        switch (rtex->surface.level[level].mode) {
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
                color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
-               tile_type = 1;
+               non_disp_tiling = 1;
                break;
        case RADEON_SURF_MODE_1D:
                color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1);
-               tile_type = rtex->tile_type;
+               non_disp_tiling = rtex->non_disp_tiling;
                break;
        case RADEON_SURF_MODE_2D:
                color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1);
-               tile_type = rtex->tile_type;
+               non_disp_tiling = rtex->non_disp_tiling;
                break;
        case RADEON_SURF_MODE_LINEAR:
        default:
                color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_GENERAL);
-               tile_type = 1;
+               non_disp_tiling = 1;
                break;
        }
        tile_split = rtex->surface.tile_split;
@@ -1308,7 +1316,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
        /* 128 bit formats require tile type = 1 */
        if (rscreen->chip_class == CAYMAN) {
                if (util_format_get_blocksize(surf->base.format) >= 16)
-                       tile_type = 1;
+                       non_disp_tiling = 1;
        }
        nbanks = eg_num_banks(rscreen->tiling_info.num_banks);
        desc = util_format_description(surf->base.format);
@@ -1323,7 +1331,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
                        S_028C74_BANK_WIDTH(bankw) |
                        S_028C74_BANK_HEIGHT(bankh) |
                        S_028C74_MACRO_TILE_ASPECT(macro_aspect) |
-                       S_028C74_NON_DISP_TILING_ORDER(tile_type) |
+                       S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
                        S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
 
        if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
@@ -1515,26 +1523,15 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
        }
 
        if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
-               uint64_t stencil_offset = rtex->surface.stencil_offset;
-               unsigned i, stile_split = rtex->surface.stencil_tile_split;
+               uint64_t stencil_offset;
+               unsigned stile_split = rtex->surface.stencil_tile_split;
 
                stile_split = eg_tile_split(stile_split);
+
+               stencil_offset = rtex->surface.stencil_level[level].offset;
                stencil_offset += r600_resource_va(screen, surf->base.texture);
-               stencil_offset += rtex->surface.level[level].offset / 4;
-               stencil_offset >>= 8;
-
-               /* We're guessing the stencil offset from the depth offset.
-                * Make sure each mipmap level has a unique offset. */
-               for (i = 1; i <= level; i++) {
-                       /* If two levels have the same address, add 256
-                        * to the offset of the smaller level. */
-                       if ((rtex->surface.level[i-1].offset / 4) >> 8 ==
-                           (rtex->surface.level[i].offset / 4) >> 8) {
-                               stencil_offset++;
-                       }
-               }
 
-               surf->db_stencil_base = stencil_offset;
+               surf->db_stencil_base = stencil_offset >> 8;
                surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) |
                                        S_028044_TILE_SPLIT(stile_split);
        } else {
@@ -2108,6 +2105,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
                                     S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
                                     S_028000_COPY_CENTROID(1) |
                                     S_028000_COPY_SAMPLE(a->copy_sample);
+       } else if (a->flush_depthstencil_in_place) {
+               db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) |
+                                    S_028000_STENCIL_COMPRESS_DISABLE(1);
+               db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
        }
 
        r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
index 05206912374e3232f8c06260b30b4be2d1defdd2..c91b2d820d29282c054da3c1db374cfa652839df 100644 (file)
 #define   C_030018_INTERLACED                          0xFFFFFFBF
 #define   S_030018_TILE_SPLIT(x)                       (((x) & 0x7) << 29)
 #define R_03001C_SQ_TEX_RESOURCE_WORD7_0             0x03001C
+#define   S_03001C_DATA_FORMAT(x)                      (((x) & 0x3F) << 0)
+#define   G_03001C_DATA_FORMAT(x)                      (((x) >> 0) & 0x3F)
+#define   C_03001C_DATA_FORMAT                         0xFFFFFFC0
 #define   S_03001C_MACRO_TILE_ASPECT(x)                (((x) & 0x3) << 6)
 #define   S_03001C_BANK_WIDTH(x)                       (((x) & 0x3) << 8)
 #define   S_03001C_BANK_HEIGHT(x)                      (((x) & 0x3) << 10)
+#define   S_03001C_DEPTH_SAMPLE_ORDER(x)               (((x) & 0x1) << 15)
 #define   S_03001C_NUM_BANKS(x)                        (((x) & 0x3) << 16)
 #define   S_03001C_TYPE(x)                             (((x) & 0x3) << 30)
 #define   G_03001C_TYPE(x)                             (((x) >> 30) & 0x3)
 #define     V_03001C_SQ_TEX_VTX_INVALID_BUFFER         0x00000001
 #define     V_03001C_SQ_TEX_VTX_VALID_TEXTURE          0x00000002
 #define     V_03001C_SQ_TEX_VTX_VALID_BUFFER           0x00000003
-#define   S_03001C_DATA_FORMAT(x)                      (((x) & 0x3F) << 0)
-#define   G_03001C_DATA_FORMAT(x)                      (((x) >> 0) & 0x3F)
-#define   C_03001C_DATA_FORMAT                         0xFFFFFFC0
 
 #define R_030008_SQ_VTX_CONSTANT_WORD2_0             0x030008
 #define   S_030008_BASE_ADDRESS_HI(x)                  (((x) & 0xFF) << 0)
 #define   S_02800C_IGNORE_SC_ZRANGE(x)                 (((x) & 0x1) << 17)
 #define   G_02800C_IGNORE_SC_ZRANGE(x)                 (((x) >> 17) & 0x1)
 #define   C_02800C_IGNORE_SC_ZRANGE                    0xFFFDFFFF
+#define   S_02800C_DISABLE_PIXEL_RATE_TILES(x)         (((x) & 0x1) << 26)
+#define   G_02800C_DISABLE_PIXEL_RATE_TILES(x)         (((x) >> 26) & 0x1)
+#define   C_02800C_DISABLE_PIXEL_RATE_TILES            0xFFFDFFFF
 #define R_028010_DB_RENDER_OVERRIDE2                 0x00028010
 #define R_028014_DB_HTILE_DATA_BASE                  0x00028014
 #define R_028028_DB_STENCIL_CLEAR                    0x00028028
index 085108d37095d03f14c9231161a210258e9ae112..a2ed17723f140005c62bc1084c05bb385f88d876 100644 (file)
@@ -159,7 +159,6 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
        rctx->db_misc_state.copy_sample = first_sample;
        rctx->db_misc_state.atom.dirty = true;
 
-
        for (level = first_level; level <= last_level; level++) {
                if (!staging && !(texture->dirty_level_mask & (1 << level)))
                        continue;
@@ -218,6 +217,58 @@ void r600_blit_decompress_depth(struct pipe_context *ctx,
        rctx->db_misc_state.atom.dirty = true;
 }
 
+static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
+                                                struct r600_texture *texture,
+                                                unsigned first_level, unsigned last_level,
+                                                unsigned first_layer, unsigned last_layer)
+{
+       struct pipe_surface *zsurf, surf_tmpl = {{0}};
+       unsigned layer, max_layer, checked_last_layer, level;
+
+       /* Enable decompression in DB_RENDER_CONTROL */
+       rctx->db_misc_state.flush_depthstencil_in_place = true;
+       rctx->db_misc_state.atom.dirty = true;
+
+       surf_tmpl.format = texture->resource.b.b.format;
+       surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
+
+       for (level = first_level; level <= last_level; level++) {
+               if (!(texture->dirty_level_mask & (1 << level)))
+                       continue;
+
+               surf_tmpl.u.tex.level = level;
+
+               /* The smaller the mipmap level, the less layers there are
+                * as far as 3D textures are concerned. */
+               max_layer = u_max_layer(&texture->resource.b.b, level);
+               checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+               for (layer = first_layer; layer <= checked_last_layer; layer++) {
+                       surf_tmpl.u.tex.first_layer = layer;
+                       surf_tmpl.u.tex.last_layer = layer;
+
+                       zsurf = rctx->context.create_surface(&rctx->context, &texture->resource.b.b, &surf_tmpl);
+
+                       r600_blitter_begin(&rctx->context, R600_DECOMPRESS);
+                       util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
+                                                         rctx->custom_dsa_flush, 1.0f);
+                       r600_blitter_end(&rctx->context);
+
+                       pipe_surface_reference(&zsurf, NULL);
+               }
+
+               /* The texture will always be dirty if some layers or samples aren't flushed.
+                * I don't think this case occurs often though. */
+               if (first_layer == 0 && last_layer == max_layer) {
+                       texture->dirty_level_mask &= ~(1 << level);
+               }
+       }
+
+       /* Disable decompression in DB_RENDER_CONTROL */
+       rctx->db_misc_state.flush_depthstencil_in_place = false;
+       rctx->db_misc_state.atom.dirty = true;
+}
+
 void r600_decompress_depth_textures(struct r600_context *rctx,
                               struct r600_samplerview_state *textures)
 {
@@ -236,10 +287,17 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
                tex = (struct r600_texture *)view->texture;
                assert(tex->is_depth && !tex->is_flushing_texture);
 
-               r600_blit_decompress_depth(&rctx->context, tex, NULL,
-                                          view->u.tex.first_level, view->u.tex.last_level,
-                                          0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
-                                          0, u_max_sample(&tex->resource.b.b));
+               if (rctx->chip_class >= EVERGREEN ||
+                   r600_can_read_depth(tex)) {
+                       r600_blit_decompress_depth_in_place(rctx, tex,
+                                                  view->u.tex.first_level, view->u.tex.last_level,
+                                                  0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level));
+               } else {
+                       r600_blit_decompress_depth(&rctx->context, tex, NULL,
+                                                  view->u.tex.first_level, view->u.tex.last_level,
+                                                  0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level),
+                                                  0, u_max_sample(&tex->resource.b.b));
+               }
        }
 }
 
@@ -299,7 +357,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
                        pipe_surface_reference(&cbsurf, NULL);
                }
 
-               /* The texture will always be dirty if some layers or samples aren't flushed.
+               /* The texture will always be dirty if some layers aren't flushed.
                 * I don't think this case occurs often though. */
                if (first_layer == 0 && last_layer == max_layer) {
                        rtex->dirty_level_mask &= ~(1 << level);
@@ -340,16 +398,24 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
                                        unsigned level,
                                        unsigned first_layer, unsigned last_layer)
 {
+       struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_texture *rtex = (struct r600_texture*)tex;
 
        if (rtex->is_depth && !rtex->is_flushing_texture) {
-               if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
-                       return false; /* error */
+               if (rctx->chip_class >= EVERGREEN ||
+                   r600_can_read_depth(rtex)) {
+                       r600_blit_decompress_depth_in_place(rctx, rtex,
+                                                  level, level,
+                                                  first_layer, last_layer);
+               } else {
+                       if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
+                               return false; /* error */
 
-               r600_blit_decompress_depth(ctx, rtex, NULL,
-                                          level, level,
-                                          first_layer, last_layer,
-                                          0, u_max_sample(tex));
+                       r600_blit_decompress_depth(ctx, rtex, NULL,
+                                                  level, level,
+                                                  first_layer, last_layer,
+                                                  0, u_max_sample(tex));
+               }
        } else if (rtex->fmask_size && rtex->cmask_size) {
                r600_blit_decompress_color(ctx, rtex, level, level,
                                           first_layer, last_layer);
index 342ab87052dcec6e577964cc53d6a7369dc61425..7f1902564ccce533d84dc8b9e8bc814321325109 100644 (file)
@@ -71,6 +71,7 @@ struct r600_db_misc_state {
        struct r600_atom atom;
        bool occlusion_query_enabled;
        bool flush_depthstencil_through_cb;
+       bool flush_depthstencil_in_place;
        bool copy_depth, copy_stencil;
        unsigned copy_sample;
        unsigned log_samples;
index 7ebf59e23a5b51de59faa6dbd31d514362101c8c..007d5e08d3594b8ed20e3f201957971efebc99f6 100644 (file)
@@ -48,7 +48,7 @@ struct r600_texture {
        unsigned                        array_mode[PIPE_MAX_TEXTURE_LEVELS];
        unsigned                        pitch_override;
        unsigned                        size;
-       unsigned                        tile_type;
+       bool                            non_disp_tiling;
        bool                            is_depth;
        bool                            is_rat;
        unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
@@ -115,6 +115,14 @@ struct r600_surface {
        unsigned pa_su_poly_offset_db_fmt_cntl;
 };
 
+/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
+static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+{
+       return rtex->resource.b.b.nr_samples <= 1 &&
+              (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+               rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT);
+}
+
 void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
 void r600_init_screen_resource_functions(struct pipe_screen *screen);
 
index 5c52f3d518eb0f78cc533f33f669db54f95569a9..a7b602ddf7a451ca98d602fca6e285a55bf7edfd 100644 (file)
@@ -985,7 +985,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
        struct r600_texture *tmp = (struct r600_texture*)texture;
        unsigned format, endian;
        uint32_t word4 = 0, yuv_format = 0, pitch = 0;
-       unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+       unsigned char swizzle[4], array_mode = 0;
        unsigned width, height, depth, offset_level, last_level;
 
        if (view == NULL)
@@ -1013,7 +1013,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
                return NULL;
        }
 
-       if (tmp->is_depth && !tmp->is_flushing_texture) {
+       if (tmp->is_depth && !tmp->is_flushing_texture && !r600_can_read_depth(tmp)) {
                if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
                        FREE(view);
                        return NULL;
@@ -1029,7 +1029,6 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
        height = height_first_level;
        depth = tmp->surface.level[offset_level].npix_z;
        pitch = tmp->surface.level[offset_level].nblk_x * util_format_get_blockwidth(state->format);
-       tile_type = tmp->tile_type;
 
        if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
                height = 1;
@@ -1056,7 +1055,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
        view->tex_resource = &tmp->resource;
        view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
                                       S_038000_TILE_MODE(array_mode) |
-                                      S_038000_TILE_TYPE(tile_type) |
+                                      S_038000_TILE_TYPE(tmp->non_disp_tiling) |
                                       S_038000_PITCH((pitch / 8) - 1) |
                                       S_038000_TEX_WIDTH(width - 1));
        view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) |
@@ -1178,7 +1177,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
        int i;
        bool blend_bypass = 0, blend_clamp = 1;
 
-       if (rtex->is_depth && !rtex->is_flushing_texture) {
+       if (rtex->is_depth && !rtex->is_flushing_texture && !r600_can_read_depth(rtex)) {
                r600_init_flushed_depth_texture(&rctx->context, surf->base.texture, NULL);
                rtex = rtex->flushed_depth_texture;
                assert(rtex);
@@ -1849,6 +1848,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
                                     S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) |
                                     S_028D0C_COPY_CENTROID(1) |
                                     S_028D0C_COPY_SAMPLE(a->copy_sample);
+       } else if (a->flush_depthstencil_in_place) {
+               db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) |
+                                    S_028D0C_STENCIL_COMPRESS_DISABLE(1);
+               db_render_override |= S_028D10_NOOP_CULL_DISABLE(1);
        }
 
        r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
index 2df390d50510c9efaf8e6a95c390e138cc72a5f2..75e7f870efbf87a9107fe4d3c6ed1026082ff34d 100644 (file)
@@ -151,7 +151,8 @@ static int r600_init_surface(struct r600_screen *rscreen,
                surface->flags |= RADEON_SURF_ZBUFFER;
 
                if (is_stencil) {
-                       surface->flags |= RADEON_SURF_SBUFFER;
+                       surface->flags |= RADEON_SURF_SBUFFER |
+                                          RADEON_SURF_HAS_SBUFFER_MIPTREE;
                }
        }
        return 0;
@@ -179,7 +180,8 @@ static int r600_setup_surface(struct pipe_screen *screen,
                rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
                rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
                if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
-                       rtex->surface.stencil_offset = rtex->surface.level[0].slice_size;
+                       rtex->surface.stencil_offset =
+                       rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
                }
        }
        for (i = 0; i <= ptex->last_level; i++) {
@@ -365,6 +367,8 @@ static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
 #endif
 }
 
+DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "R600_PRINT_TEXDEPTH", FALSE);
+
 static struct r600_texture *
 r600_texture_create_object(struct pipe_screen *screen,
                           const struct pipe_resource *base,
@@ -411,6 +415,9 @@ r600_texture_create_object(struct pipe_screen *screen,
                return NULL;
        }
 
+       /* Tiled depth textures utilize the non-displayable tile order. */
+       rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+
        /* Now create the backing buffer. */
        if (!buf && alloc_bo) {
                unsigned base_align = rtex->surface.bo_alignment;
@@ -433,6 +440,52 @@ r600_texture_create_object(struct pipe_screen *screen,
                memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
                rscreen->ws->buffer_unmap(resource->cs_buf);
        }
+
+       if (debug_get_option_print_texdepth() && rtex->is_depth && rtex->non_disp_tiling) {
+               printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+                      "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
+                      "bpe=%u, nsamples=%u, flags=%u\n",
+                      rtex->surface.npix_x, rtex->surface.npix_y,
+                      rtex->surface.npix_z, rtex->surface.blk_w,
+                      rtex->surface.blk_h, rtex->surface.blk_d,
+                      rtex->surface.array_size, rtex->surface.last_level,
+                      rtex->surface.bpe, rtex->surface.nsamples,
+                      rtex->surface.flags);
+               if (rtex->surface.flags & RADEON_SURF_ZBUFFER) {
+                       for (int i = 0; i <= rtex->surface.last_level; i++) {
+                               printf("  Z %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+                                      "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+                                      "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+                                      i, rtex->surface.level[i].offset,
+                                      rtex->surface.level[i].slice_size,
+                                      rtex->surface.level[i].npix_x,
+                                      rtex->surface.level[i].npix_y,
+                                      rtex->surface.level[i].npix_z,
+                                      rtex->surface.level[i].nblk_x,
+                                      rtex->surface.level[i].nblk_y,
+                                      rtex->surface.level[i].nblk_z,
+                                      rtex->surface.level[i].pitch_bytes,
+                                      rtex->surface.level[i].mode);
+                       }
+               }
+               if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+                       for (int i = 0; i <= rtex->surface.last_level; i++) {
+                               printf("  S %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+                                      "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+                                      "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+                                      i, rtex->surface.stencil_level[i].offset,
+                                      rtex->surface.stencil_level[i].slice_size,
+                                      rtex->surface.stencil_level[i].npix_x,
+                                      rtex->surface.stencil_level[i].npix_y,
+                                      rtex->surface.stencil_level[i].npix_z,
+                                      rtex->surface.stencil_level[i].nblk_x,
+                                      rtex->surface.stencil_level[i].nblk_y,
+                                      rtex->surface.stencil_level[i].nblk_z,
+                                      rtex->surface.stencil_level[i].pitch_bytes,
+                                      rtex->surface.stencil_level[i].mode);
+                       }
+               }
+       }
        return rtex;
 }
 
@@ -591,6 +644,7 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
        }
 
        (*flushed_depth_texture)->is_flushing_texture = TRUE;
+       (*flushed_depth_texture)->non_disp_tiling = false;
        return true;
 }
 
@@ -845,7 +899,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 
        /* Colorspace (return non-RGB formats directly). */
        switch (desc->colorspace) {
-               /* Depth stencil formats */
+       /* Depth stencil formats */
        case UTIL_FORMAT_COLORSPACE_ZS:
                switch (format) {
                case PIPE_FORMAT_Z16_UNORM: