unsigned height;
unsigned xalign;
unsigned yalign;
+ unsigned alignment;
};
struct r600_texture {
uint64_t size;
unsigned num_level0_transfers;
+ enum pipe_format db_render_format;
bool is_depth;
bool db_compatible;
bool can_sample_z;
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;
struct r600_resource *htile_buffer;
+ bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
struct radeon_surf *surface,
const struct pipe_resource *ptex,
unsigned array_mode,
- bool is_flushed_depth)
+ bool is_flushed_depth,
+ bool tc_compatible_htile)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
if (!is_flushed_depth && is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
+ if (tc_compatible_htile &&
+ array_mode == RADEON_SURF_MODE_2D) {
+ /* TC-compatible HTILE only supports Z32_FLOAT.
+ * Promote Z16 to Z32. DB->CB copies will convert
+ * the format for transfers.
+ */
+ surface->bpe = 4;
+ surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
if (is_stencil) {
surface->flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
+
if (rscreen->chip_class >= SI) {
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
rtex->htile.height = height;
rtex->htile.xalign = cl_width * 8;
rtex->htile.yalign = cl_height * 8;
+ rtex->htile.alignment = base_align;
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ uint64_t htile_size, alignment;
+ uint32_t clear_value;
+
+ if (rtex->tc_compatible_htile) {
+ htile_size = rtex->surface.htile_size;
+ alignment = rtex->surface.htile_alignment;
+ clear_value = 0x0000030F;
+ } else {
+ htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ alignment = rtex->htile.alignment;
+ clear_value = 0;
+ }
if (!htile_size)
return;
rtex->htile_buffer = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, htile_size);
+ r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ htile_size, alignment);
if (rtex->htile_buffer == NULL) {
/* this is not a fatal error as we can still keep rendering
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
- r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
- htile_size, 0, R600_COHERENCY_NONE);
+ r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
+ 0, htile_size, clear_value,
+ R600_COHERENCY_NONE);
}
}
if (rtex->htile_buffer)
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
- "xalign=%u, yalign=%u\n",
+ "xalign=%u, yalign=%u, TC_compatible = %u\n",
rtex->htile_buffer->b.b.width0,
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
- rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
+ rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
+ rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
return NULL;
}
+ rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
+ assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
+ rtex->tc_compatible_htile);
+
+ /* TC-compatible HTILE only supports Z32_FLOAT. */
+ if (rtex->tc_compatible_htile)
+ rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+ else
+ rtex->db_render_format = base->format;
+
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
+ bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+ bool tc_compatible_htile =
+ rscreen->chip_class >= VI &&
+ (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
+ !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
+ !is_flushed_depth &&
+ templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
+ util_format_is_depth_or_stencil(templ->format);
+
int r;
r = r600_init_surface(rscreen, &surface, templ,
r600_choose_tiling(rscreen, templ),
- templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+ is_flushed_depth, tc_compatible_htile);
if (r) {
return NULL;
}
else
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
+ r = r600_init_surface(rscreen, &surface, templ, array_mode,
+ false, false);
if (r) {
return NULL;
}
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
uint64_t dcc_size;
uint64_t dcc_alignment;
+ /* TC-compatible HTILE only. */
+ uint64_t htile_size;
+ uint64_t htile_alignment;
};
struct radeon_bo_list_item {
}
}
+ assert(!tex->tc_compatible_htile || levels_z == 0);
+
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
zsbuf->u.tex.level == 0 &&
zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
- if (buffers & PIPE_CLEAR_DEPTH) {
+ /* TC-compatible HTILE only supports depth clears to 0 or 1. */
+ if (buffers & PIPE_CLEAR_DEPTH &&
+ (!zstex->tc_compatible_htile ||
+ depth == 0 || depth == 1)) {
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (buffers & PIPE_CLEAR_STENCIL) {
+ /* TC-compatible HTILE only supports stencil clears to 0. */
+ if (buffers & PIPE_CLEAR_STENCIL &&
+ (!zstex->tc_compatible_htile || stencil == 0)) {
stencil &= 0xff;
/* Need to disable EXPCLEAR temporarily if clearing
state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
tex->dcc_offset +
base_level_info->dcc_offset) >> 8;
+ } else if (tex->tc_compatible_htile) {
+ state[6] |= S_008F28_COMPRESSION_EN(1);
+ state[7] = tex->htile_buffer->gpu_address >> 8;
}
}
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
+ struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (rtex->db_compatible) {
+ if (rtex->db_compatible &&
+ (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
samplers->depth_texture_mask |= 1u << slot;
} else {
samplers->depth_texture_mask &= ~(1u << slot);
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
+ LLVMValueRef z;
+
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
- address[count++] = coords[ref_pos];
+ z = coords[ref_pos];
}
+
+ /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (ctx->screen->b.chip_class == VI)
+ z = radeon_llvm_saturate(bld_base, z);
+
+ address[count++] = z;
}
/* Pack user derivatives */
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
return;
+ /* Use the user format, not db_render_format, so that the polygon
+ * offset behaves as expected by applications.
+ */
switch (sctx->framebuffer.state.zsbuf->texture->format) {
case PIPE_FORMAT_Z16_UNORM:
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
uint64_t z_offs, s_offs;
uint32_t db_htile_data_base, db_htile_surface;
- format = si_translate_dbformat(rtex->resource.b.b.format);
+ format = si_translate_dbformat(rtex->db_render_format);
if (format == V_028040_Z_INVALID) {
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
z_offs += rtex->surface.level[level].offset;
s_offs += rtex->surface.stencil_level[level].offset;
- db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+ db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
z_info = S_028040_FORMAT(format);
if (rtex->resource.b.b.nr_samples > 1) {
*/
if (rtex->resource.b.b.nr_samples <= 1)
s_info |= S_028044_ALLOW_EXPCLEAR(1);
- } else
- /* Use all of the htile_buffer for depth if there's no stencil. */
+ } else if (!rtex->tc_compatible_htile) {
+ /* Use all of the htile_buffer for depth if there's no stencil.
+ * This must not be set when TC-compatible HTILE is enabled
+ * due to a hw bug.
+ */
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
uint64_t va = rtex->htile_buffer->gpu_address;
db_htile_data_base = va >> 8;
db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+ if (rtex->tc_compatible_htile) {
+ db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+ switch (rtex->resource.b.b.nr_samples) {
+ case 0:
+ case 1:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+ break;
+ case 2:
+ case 4:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+ break;
+ case 8:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+ break;
+ default:
+ assert(0);
+ }
+ }
} else {
db_htile_data_base = 0;
db_htile_surface = 0;
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
+ rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
surflevel = tmp->surface.level;
if (tmp->db_compatible) {
+ if (!view->is_stencil_sampler)
+ pipe_format = tmp->db_render_format;
+
switch (pipe_format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
pipe_format = PIPE_FORMAT_Z32_FLOAT;
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ if (!rtex->tc_compatible_htile)
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
createFlags.value = 0;
createFlags.useTileIndex = 1;
createFlags.degradeBaseLevel = 1;
+ createFlags.useHtileSliceAlign = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = ws->family;
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
- ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
+ ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
+ ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
{
struct radeon_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
}
}
+ /* TC-compatible HTILE. */
+ if (!is_stencil &&
+ AddrSurfInfoIn->flags.depth &&
+ AddrSurfInfoIn->flags.tcCompatible &&
+ surf_level->mode == RADEON_SURF_MODE_2D &&
+ level == 0) {
+ AddrHtileIn->flags.tcCompatible = 1;
+ AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
+ AddrHtileIn->height = AddrSurfInfoOut->height;
+ AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
+ AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
+ AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeHtileInfo(ws->addrlib,
+ AddrHtileIn,
+ AddrHtileOut);
+
+ if (ret == ADDR_OK) {
+ surf->htile_size = AddrHtileOut->htileBytes;
+ surf->htile_alignment = AddrHtileOut->baseAlign;
+ }
+ }
+
return 0;
}
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
+ ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
ADDR_TILEINFO AddrTileInfoIn = {0};
ADDR_TILEINFO AddrTileInfoOut = {0};
int r;
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
+ AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
+ AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
type = RADEON_SURF_GET(surf->flags, TYPE);
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
- AddrSurfInfoIn.flags.degrade4Space = 1;
+ AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
+
+ /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
+ * requested, because TC-compatible HTILE requires 2D tiling.
+ */
+ AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
surf->bo_size = 0;
surf->dcc_size = 0;
surf->dcc_alignment = 1;
+ surf->htile_size = 0;
+ surf->htile_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, false, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut,
+ &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
if (r)
return r;
AddrSurfInfoIn.bpp = 8;
AddrSurfInfoIn.flags.depth = 0;
AddrSurfInfoIn.flags.stencil = 1;
+ AddrSurfInfoIn.flags.tcCompatible = 0;
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, true, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
+ NULL, NULL);
if (r)
return r;
ws->info.num_tile_pipes);
}
+ /* Make sure HTILE covers the whole miptree, because the shader reads
+ * TC-compatible HTILE even for levels where it's disabled by DB.
+ */
+ if (surf->htile_size && surf->last_level)
+ surf->htile_size *= 2;
+
return 0;
}