From 428e37c2da420f7dc14a2ea265f2387270f9bee1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 2 Oct 2012 22:02:54 +0200 Subject: [PATCH] r600g: add in-place DB decompression and texturing with DB tiling The decompression is done in-place and only the compressed tiles are decompressed. Note: R6xx-R7xx can do that only with Z16 and Z32F. The texture unit is programmed to use non-displayable tiling and depth ordering of samples, so that it can fetch the texture in the native DB format. The latest version of the libdrm surface allocator is required for stencil texturing to work. The old one didn't create the mipmap tree correctly. We need a separate mipmap tree for stencil, because the stencil mipmap offsets are not really depth offsets/4. There are still some known bugs, but this should save some memory and it also improves performance a little bit in Lightsmark (especially with low resolutions; tested with Radeon HD 5000). The DB->CB copy is still used for transfers. Reviewed-by: Jerome Glisse --- src/gallium/auxiliary/util/u_blitter.c | 3 +- .../drivers/r600/evergreen_compute_internal.c | 6 +- src/gallium/drivers/r600/evergreen_state.c | 105 +++++++++--------- src/gallium/drivers/r600/evergreend.h | 10 +- src/gallium/drivers/r600/r600_blit.c | 90 +++++++++++++-- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_resource.h | 10 +- src/gallium/drivers/r600/r600_state.c | 13 ++- src/gallium/drivers/r600/r600_texture.c | 60 +++++++++- 9 files changed, 218 insertions(+), 80 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index bb784d6b370..d10ce231c34 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -1609,7 +1609,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, blitter_disable_render_cond(ctx); /* bind states */ - pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]); + pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] : + ctx->blend[0]); pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage); ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE)); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 3b1e5812d02..7bc7fb43f6c 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -478,7 +478,7 @@ void evergreen_set_tex_resource( unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; + unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0; unsigned height, depth; swizzle[0] = 0; @@ -501,7 +501,7 @@ void evergreen_set_tex_resource( pitch = align(tmp->surface.level[0].nblk_x * util_format_get_blockwidth(tmp->resource.b.b.format), 8); array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; + non_disp_tiling = tmp->non_disp_tiling; assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY); assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY); @@ -511,7 +511,7 @@ void evergreen_set_tex_resource( evergreen_emit_raw_value(res, (S_030000_DIM(r600_tex_dim(view->base.texture->target)) | S_030000_PITCH((pitch / 8) - 1) | - S_030000_NON_DISP_TILING_ORDER(tile_type) | + S_030000_NON_DISP_TILING_ORDER(non_disp_tiling) | S_030000_TEX_WIDTH(view->base.texture->width0 - 1))); evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index cf3c60fbfc1..c105e55279f 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -979,9 +979,11 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, struct r600_texture *tmp = (struct r600_texture*)texture; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; + unsigned char swizzle[4], array_mode = 0, non_disp_tiling = 0; unsigned height, depth, width; unsigned macro_aspect, tile_split, bankh, bankw, nbanks; + enum pipe_format pipe_format = state->format; + struct radeon_surface_level *surflevel; if (view == NULL) return NULL; @@ -999,7 +1001,27 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(ctx->screen, state->format, + tile_split = tmp->surface.tile_split; + surflevel = tmp->surface.level; + + /* Texturing with separate depth and stencil. */ + if (tmp->is_depth && !tmp->is_flushing_texture) { + switch (pipe_format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + pipe_format = PIPE_FORMAT_Z32_FLOAT; + break; + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + pipe_format = PIPE_FORMAT_S8_UINT; + tile_split = tmp->surface.stencil_tile_split; + surflevel = tmp->surface.stencil_level; + break; + default:; + } + } + + format = r600_translate_texformat(ctx->screen, pipe_format, swizzle, &word4, &yuv_format); assert(format != ~0); @@ -1008,23 +1030,15 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, return NULL; } - if (tmp->is_depth && !tmp->is_flushing_texture) { - if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) { - FREE(view); - return NULL; - } - tmp = tmp->flushed_depth_texture; - } - endian = r600_colorformat_endian_swap(format); width = width0; height = height0; - depth = tmp->surface.level[0].npix_z; - pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(state->format); - tile_type = tmp->tile_type; + depth = surflevel[0].npix_z; + pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); + non_disp_tiling = tmp->non_disp_tiling; - switch (tmp->surface.level[0].mode) { + switch (surflevel[0].mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: array_mode = V_028C70_ARRAY_LINEAR_ALIGNED; break; @@ -1039,7 +1053,6 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, array_mode = V_028C70_ARRAY_LINEAR_GENERAL; break; } - tile_split = tmp->surface.tile_split; macro_aspect = tmp->surface.mtilea; bankw = tmp->surface.bankw; bankh = tmp->surface.bankh; @@ -1050,8 +1063,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, /* 128 bit formats require tile type = 1 */ if (rscreen->chip_class == CAYMAN) { - if (util_format_get_blocksize(state->format) >= 16) - tile_type = 1; + if (util_format_get_blocksize(pipe_format) >= 16) + non_disp_tiling = 1; } nbanks = eg_num_banks(rscreen->tiling_info.num_banks); @@ -1067,13 +1080,13 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, S_030000_PITCH((pitch / 8) - 1) | S_030000_TEX_WIDTH(width - 1)); if (rscreen->chip_class == CAYMAN) - view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(tile_type); + view->tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); else - view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type); + view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling); view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); - view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; + view->tex_resource_words[2] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) { @@ -1087,9 +1100,9 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8; } } else if (state->u.tex.last_level && texture->nr_samples <= 1) { - view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; + view->tex_resource_words[3] = (surflevel[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; } else { - view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; + view->tex_resource_words[3] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; } view->tex_resource_words[4] = (word4 | @@ -1116,7 +1129,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, S_03001C_BANK_WIDTH(bankw) | S_03001C_BANK_HEIGHT(bankh) | S_03001C_MACRO_TILE_ASPECT(macro_aspect) | - S_03001C_NUM_BANKS(nbanks); + S_03001C_NUM_BANKS(nbanks) | + S_03001C_DEPTH_SAMPLE_ORDER(tmp->is_depth && !tmp->is_flushing_texture); return &view->base; } @@ -1253,17 +1267,11 @@ void evergreen_init_color_surface(struct r600_context *rctx, unsigned color_info, color_attrib, color_dim = 0; unsigned format, swap, ntype, endian; uint64_t offset, base_offset; - unsigned tile_type, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks; + unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks; const struct util_format_description *desc; int i; bool blend_clamp = 0, blend_bypass = 0; - if (rtex->is_depth && !rtex->is_flushing_texture) { - r600_init_flushed_depth_texture(&rctx->context, pipe_tex, NULL); - rtex = rtex->flushed_depth_texture; - assert(rtex); - } - offset = rtex->surface.level[level].offset; if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) { offset += rtex->surface.level[level].slice_size * @@ -1278,20 +1286,20 @@ void evergreen_init_color_surface(struct r600_context *rctx, switch (rtex->surface.level[level].mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED); - tile_type = 1; + non_disp_tiling = 1; break; case RADEON_SURF_MODE_1D: color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1); - tile_type = rtex->tile_type; + non_disp_tiling = rtex->non_disp_tiling; break; case RADEON_SURF_MODE_2D: color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1); - tile_type = rtex->tile_type; + non_disp_tiling = rtex->non_disp_tiling; break; case RADEON_SURF_MODE_LINEAR: default: color_info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_GENERAL); - tile_type = 1; + non_disp_tiling = 1; break; } tile_split = rtex->surface.tile_split; @@ -1308,7 +1316,7 @@ void evergreen_init_color_surface(struct r600_context *rctx, /* 128 bit formats require tile type = 1 */ if (rscreen->chip_class == CAYMAN) { if (util_format_get_blocksize(surf->base.format) >= 16) - tile_type = 1; + non_disp_tiling = 1; } nbanks = eg_num_banks(rscreen->tiling_info.num_banks); desc = util_format_description(surf->base.format); @@ -1323,7 +1331,7 @@ void evergreen_init_color_surface(struct r600_context *rctx, S_028C74_BANK_WIDTH(bankw) | S_028C74_BANK_HEIGHT(bankh) | S_028C74_MACRO_TILE_ASPECT(macro_aspect) | - S_028C74_NON_DISP_TILING_ORDER(tile_type) | + S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) | S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) { @@ -1515,26 +1523,15 @@ static void evergreen_init_depth_surface(struct r600_context *rctx, } if (rtex->surface.flags & RADEON_SURF_SBUFFER) { - uint64_t stencil_offset = rtex->surface.stencil_offset; - unsigned i, stile_split = rtex->surface.stencil_tile_split; + uint64_t stencil_offset; + unsigned stile_split = rtex->surface.stencil_tile_split; stile_split = eg_tile_split(stile_split); + + stencil_offset = rtex->surface.stencil_level[level].offset; stencil_offset += r600_resource_va(screen, surf->base.texture); - stencil_offset += rtex->surface.level[level].offset / 4; - stencil_offset >>= 8; - - /* We're guessing the stencil offset from the depth offset. - * Make sure each mipmap level has a unique offset. */ - for (i = 1; i <= level; i++) { - /* If two levels have the same address, add 256 - * to the offset of the smaller level. */ - if ((rtex->surface.level[i-1].offset / 4) >> 8 == - (rtex->surface.level[i].offset / 4) >> 8) { - stencil_offset++; - } - } - surf->db_stencil_base = stencil_offset; + surf->db_stencil_base = stencil_offset >> 8; surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) | S_028044_TILE_SPLIT(stile_split); } else { @@ -2108,6 +2105,10 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) | S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(a->copy_sample); + } else if (a->flush_depthstencil_in_place) { + db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(1) | + S_028000_STENCIL_COMPRESS_DISABLE(1); + db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1); } r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 05206912374..c91b2d820d2 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1109,9 +1109,13 @@ #define C_030018_INTERLACED 0xFFFFFFBF #define S_030018_TILE_SPLIT(x) (((x) & 0x7) << 29) #define R_03001C_SQ_TEX_RESOURCE_WORD7_0 0x03001C +#define S_03001C_DATA_FORMAT(x) (((x) & 0x3F) << 0) +#define G_03001C_DATA_FORMAT(x) (((x) >> 0) & 0x3F) +#define C_03001C_DATA_FORMAT 0xFFFFFFC0 #define S_03001C_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 6) #define S_03001C_BANK_WIDTH(x) (((x) & 0x3) << 8) #define S_03001C_BANK_HEIGHT(x) (((x) & 0x3) << 10) +#define S_03001C_DEPTH_SAMPLE_ORDER(x) (((x) & 0x1) << 15) #define S_03001C_NUM_BANKS(x) (((x) & 0x3) << 16) #define S_03001C_TYPE(x) (((x) & 0x3) << 30) #define G_03001C_TYPE(x) (((x) >> 30) & 0x3) @@ -1120,9 +1124,6 @@ #define V_03001C_SQ_TEX_VTX_INVALID_BUFFER 0x00000001 #define V_03001C_SQ_TEX_VTX_VALID_TEXTURE 0x00000002 #define V_03001C_SQ_TEX_VTX_VALID_BUFFER 0x00000003 -#define S_03001C_DATA_FORMAT(x) (((x) & 0x3F) << 0) -#define G_03001C_DATA_FORMAT(x) (((x) >> 0) & 0x3F) -#define C_03001C_DATA_FORMAT 0xFFFFFFC0 #define R_030008_SQ_VTX_CONSTANT_WORD2_0 0x030008 #define S_030008_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0) @@ -1644,6 +1645,9 @@ #define S_02800C_IGNORE_SC_ZRANGE(x) (((x) & 0x1) << 17) #define G_02800C_IGNORE_SC_ZRANGE(x) (((x) >> 17) & 0x1) #define C_02800C_IGNORE_SC_ZRANGE 0xFFFDFFFF +#define S_02800C_DISABLE_PIXEL_RATE_TILES(x) (((x) & 0x1) << 26) +#define G_02800C_DISABLE_PIXEL_RATE_TILES(x) (((x) >> 26) & 0x1) +#define C_02800C_DISABLE_PIXEL_RATE_TILES 0xFFFDFFFF #define R_028010_DB_RENDER_OVERRIDE2 0x00028010 #define R_028014_DB_HTILE_DATA_BASE 0x00028014 #define R_028028_DB_STENCIL_CLEAR 0x00028028 diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 085108d3709..a2ed17723f1 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -159,7 +159,6 @@ void r600_blit_decompress_depth(struct pipe_context *ctx, rctx->db_misc_state.copy_sample = first_sample; rctx->db_misc_state.atom.dirty = true; - for (level = first_level; level <= last_level; level++) { if (!staging && !(texture->dirty_level_mask & (1 << level))) continue; @@ -218,6 +217,58 @@ void r600_blit_decompress_depth(struct pipe_context *ctx, rctx->db_misc_state.atom.dirty = true; } +static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, + struct r600_texture *texture, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer) +{ + struct pipe_surface *zsurf, surf_tmpl = {{0}}; + unsigned layer, max_layer, checked_last_layer, level; + + /* Enable decompression in DB_RENDER_CONTROL */ + rctx->db_misc_state.flush_depthstencil_in_place = true; + rctx->db_misc_state.atom.dirty = true; + + surf_tmpl.format = texture->resource.b.b.format; + surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; + + for (level = first_level; level <= last_level; level++) { + if (!(texture->dirty_level_mask & (1 << level))) + continue; + + surf_tmpl.u.tex.level = level; + + /* The smaller the mipmap level, the less layers there are + * as far as 3D textures are concerned. */ + max_layer = u_max_layer(&texture->resource.b.b, level); + checked_last_layer = last_layer < max_layer ? last_layer : max_layer; + + for (layer = first_layer; layer <= checked_last_layer; layer++) { + surf_tmpl.u.tex.first_layer = layer; + surf_tmpl.u.tex.last_layer = layer; + + zsurf = rctx->context.create_surface(&rctx->context, &texture->resource.b.b, &surf_tmpl); + + r600_blitter_begin(&rctx->context, R600_DECOMPRESS); + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0, + rctx->custom_dsa_flush, 1.0f); + r600_blitter_end(&rctx->context); + + pipe_surface_reference(&zsurf, NULL); + } + + /* The texture will always be dirty if some layers or samples aren't flushed. + * I don't think this case occurs often though. */ + if (first_layer == 0 && last_layer == max_layer) { + texture->dirty_level_mask &= ~(1 << level); + } + } + + /* Disable decompression in DB_RENDER_CONTROL */ + rctx->db_misc_state.flush_depthstencil_in_place = false; + rctx->db_misc_state.atom.dirty = true; +} + void r600_decompress_depth_textures(struct r600_context *rctx, struct r600_samplerview_state *textures) { @@ -236,10 +287,17 @@ void r600_decompress_depth_textures(struct r600_context *rctx, tex = (struct r600_texture *)view->texture; assert(tex->is_depth && !tex->is_flushing_texture); - r600_blit_decompress_depth(&rctx->context, tex, NULL, - view->u.tex.first_level, view->u.tex.last_level, - 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level), - 0, u_max_sample(&tex->resource.b.b)); + if (rctx->chip_class >= EVERGREEN || + r600_can_read_depth(tex)) { + r600_blit_decompress_depth_in_place(rctx, tex, + view->u.tex.first_level, view->u.tex.last_level, + 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level)); + } else { + r600_blit_decompress_depth(&rctx->context, tex, NULL, + view->u.tex.first_level, view->u.tex.last_level, + 0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level), + 0, u_max_sample(&tex->resource.b.b)); + } } } @@ -299,7 +357,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, pipe_surface_reference(&cbsurf, NULL); } - /* The texture will always be dirty if some layers or samples aren't flushed. + /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { rtex->dirty_level_mask &= ~(1 << level); @@ -340,16 +398,24 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, unsigned level, unsigned first_layer, unsigned last_layer) { + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rtex = (struct r600_texture*)tex; if (rtex->is_depth && !rtex->is_flushing_texture) { - if (!r600_init_flushed_depth_texture(ctx, tex, NULL)) - return false; /* error */ + if (rctx->chip_class >= EVERGREEN || + r600_can_read_depth(rtex)) { + r600_blit_decompress_depth_in_place(rctx, rtex, + level, level, + first_layer, last_layer); + } else { + if (!r600_init_flushed_depth_texture(ctx, tex, NULL)) + return false; /* error */ - r600_blit_decompress_depth(ctx, rtex, NULL, - level, level, - first_layer, last_layer, - 0, u_max_sample(tex)); + r600_blit_decompress_depth(ctx, rtex, NULL, + level, level, + first_layer, last_layer, + 0, u_max_sample(tex)); + } } else if (rtex->fmask_size && rtex->cmask_size) { r600_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 342ab87052d..7f1902564cc 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -71,6 +71,7 @@ struct r600_db_misc_state { struct r600_atom atom; bool occlusion_query_enabled; bool flush_depthstencil_through_cb; + bool flush_depthstencil_in_place; bool copy_depth, copy_stencil; unsigned copy_sample; unsigned log_samples; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 7ebf59e23a5..007d5e08d35 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -48,7 +48,7 @@ struct r600_texture { unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; - unsigned tile_type; + bool non_disp_tiling; bool is_depth; bool is_rat; unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ @@ -115,6 +115,14 @@ struct r600_surface { unsigned pa_su_poly_offset_db_fmt_cntl; }; +/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */ +static INLINE bool r600_can_read_depth(struct r600_texture *rtex) +{ + return rtex->resource.b.b.nr_samples <= 1 && + (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || + rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT); +} + void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res); void r600_init_screen_resource_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5c52f3d518e..a7b602ddf7a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -985,7 +985,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, struct r600_texture *tmp = (struct r600_texture*)texture; unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4], array_mode = 0, tile_type = 0; + unsigned char swizzle[4], array_mode = 0; unsigned width, height, depth, offset_level, last_level; if (view == NULL) @@ -1013,7 +1013,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, return NULL; } - if (tmp->is_depth && !tmp->is_flushing_texture) { + if (tmp->is_depth && !tmp->is_flushing_texture && !r600_can_read_depth(tmp)) { if (!r600_init_flushed_depth_texture(ctx, texture, NULL)) { FREE(view); return NULL; @@ -1029,7 +1029,6 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, height = height_first_level; depth = tmp->surface.level[offset_level].npix_z; pitch = tmp->surface.level[offset_level].nblk_x * util_format_get_blockwidth(state->format); - tile_type = tmp->tile_type; if (texture->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; @@ -1056,7 +1055,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, view->tex_resource = &tmp->resource; view->tex_resource_words[0] = (S_038000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) | S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | + S_038000_TILE_TYPE(tmp->non_disp_tiling) | S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(width - 1)); view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) | @@ -1178,7 +1177,7 @@ static void r600_init_color_surface(struct r600_context *rctx, int i; bool blend_bypass = 0, blend_clamp = 1; - if (rtex->is_depth && !rtex->is_flushing_texture) { + if (rtex->is_depth && !rtex->is_flushing_texture && !r600_can_read_depth(rtex)) { r600_init_flushed_depth_texture(&rctx->context, surf->base.texture, NULL); rtex = rtex->flushed_depth_texture; assert(rtex); @@ -1849,6 +1848,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom S_028D0C_STENCIL_COPY_ENABLE(a->copy_stencil) | S_028D0C_COPY_CENTROID(1) | S_028D0C_COPY_SAMPLE(a->copy_sample); + } else if (a->flush_depthstencil_in_place) { + db_render_control |= S_028D0C_DEPTH_COMPRESS_DISABLE(1) | + S_028D0C_STENCIL_COMPRESS_DISABLE(1); + db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); } r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 2df390d5051..75e7f870efb 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -151,7 +151,8 @@ static int r600_init_surface(struct r600_screen *rscreen, surface->flags |= RADEON_SURF_ZBUFFER; if (is_stencil) { - surface->flags |= RADEON_SURF_SBUFFER; + surface->flags |= RADEON_SURF_SBUFFER | + RADEON_SURF_HAS_SBUFFER_MIPTREE; } } return 0; @@ -179,7 +180,8 @@ static int r600_setup_surface(struct pipe_screen *screen, rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override; rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y; if (rtex->surface.flags & RADEON_SURF_SBUFFER) { - rtex->surface.stencil_offset = rtex->surface.level[0].slice_size; + rtex->surface.stencil_offset = + rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size; } } for (i = 0; i <= ptex->last_level; i++) { @@ -365,6 +367,8 @@ static void r600_texture_allocate_cmask(struct r600_screen *rscreen, #endif } +DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "R600_PRINT_TEXDEPTH", FALSE); + static struct r600_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -411,6 +415,9 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + /* Tiled depth textures utilize the non-displayable tile order. */ + rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D; + /* Now create the backing buffer. */ if (!buf && alloc_bo) { unsigned base_align = rtex->surface.bo_alignment; @@ -433,6 +440,52 @@ r600_texture_create_object(struct pipe_screen *screen, memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size); rscreen->ws->buffer_unmap(resource->cs_buf); } + + if (debug_get_option_print_texdepth() && rtex->is_depth && rtex->non_disp_tiling) { + printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " + "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, " + "bpe=%u, nsamples=%u, flags=%u\n", + rtex->surface.npix_x, rtex->surface.npix_y, + rtex->surface.npix_z, rtex->surface.blk_w, + rtex->surface.blk_h, rtex->surface.blk_d, + rtex->surface.array_size, rtex->surface.last_level, + rtex->surface.bpe, rtex->surface.nsamples, + rtex->surface.flags); + if (rtex->surface.flags & RADEON_SURF_ZBUFFER) { + for (int i = 0; i <= rtex->surface.last_level; i++) { + printf(" Z %i: offset=%llu, slice_size=%llu, npix_x=%u, " + "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " + "nblk_z=%u, pitch_bytes=%u, mode=%u\n", + i, rtex->surface.level[i].offset, + rtex->surface.level[i].slice_size, + rtex->surface.level[i].npix_x, + rtex->surface.level[i].npix_y, + rtex->surface.level[i].npix_z, + rtex->surface.level[i].nblk_x, + rtex->surface.level[i].nblk_y, + rtex->surface.level[i].nblk_z, + rtex->surface.level[i].pitch_bytes, + rtex->surface.level[i].mode); + } + } + if (rtex->surface.flags & RADEON_SURF_SBUFFER) { + for (int i = 0; i <= rtex->surface.last_level; i++) { + printf(" S %i: offset=%llu, slice_size=%llu, npix_x=%u, " + "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " + "nblk_z=%u, pitch_bytes=%u, mode=%u\n", + i, rtex->surface.stencil_level[i].offset, + rtex->surface.stencil_level[i].slice_size, + rtex->surface.stencil_level[i].npix_x, + rtex->surface.stencil_level[i].npix_y, + rtex->surface.stencil_level[i].npix_z, + rtex->surface.stencil_level[i].nblk_x, + rtex->surface.stencil_level[i].nblk_y, + rtex->surface.stencil_level[i].nblk_z, + rtex->surface.stencil_level[i].pitch_bytes, + rtex->surface.stencil_level[i].mode); + } + } + } return rtex; } @@ -591,6 +644,7 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx, } (*flushed_depth_texture)->is_flushing_texture = TRUE; + (*flushed_depth_texture)->non_disp_tiling = false; return true; } @@ -845,7 +899,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, /* Colorspace (return non-RGB formats directly). */ switch (desc->colorspace) { - /* Depth stencil formats */ + /* Depth stencil formats */ case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: -- 2.30.2