#include "si_pipe.h"
#include "si_query.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_log.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "state_tracker/drm_driver.h"
#include "sid.h"
#include "amd/addrlib/inc/addrinterface.h"
+#include "drm-uapi/drm_fourcc.h"
static enum radeon_surf_mode
si_choose_tiling(struct si_screen *sscreen,
const struct pipe_resource *ptex,
enum radeon_surf_mode array_mode,
unsigned pitch_in_bytes_override,
- unsigned offset,
bool is_imported,
bool is_scanout,
bool is_flushed_depth,
util_format_description(ptex->format);
bool is_depth, is_stencil;
int r;
- unsigned i, bpe, flags = 0;
+ unsigned bpe, flags = 0;
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
if (!is_flushed_depth && is_depth) {
flags |= RADEON_SURF_ZBUFFER;
- if (tc_compatible_htile &&
- (sscreen->info.chip_class >= GFX9 ||
- array_mode == RADEON_SURF_MODE_2D)) {
+ if (sscreen->debug_flags & DBG(NO_HYPERZ)) {
+ flags |= RADEON_SURF_NO_HTILE;
+ } else if (tc_compatible_htile &&
+ (sscreen->info.chip_class >= GFX9 ||
+ array_mode == RADEON_SURF_MODE_2D)) {
/* TC-compatible HTILE only supports Z32_FLOAT.
* GFX9 also supports Z16_UNORM.
* On GFX8, promote Z16 to Z32. DB->CB copies will convert
ptex->nr_storage_samples >= 2)
flags |= RADEON_SURF_DISABLE_DCC;
+ /* Shared textures must always set up DCC.
+ * If it's not present, it will be disabled by
+ * si_get_opaque_metadata later.
+ */
+ if (!is_imported && (sscreen->debug_flags & DBG(NO_DCC)))
+ flags |= RADEON_SURF_DISABLE_DCC;
+
if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
/* This should catch bugs in gallium users setting incorrect flags. */
assert(ptex->nr_samples <= 1 &&
flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
+ if (sscreen->debug_flags & DBG(NO_FMASK))
+ flags |= RADEON_SURF_NO_FMASK;
if (sscreen->info.chip_class >= GFX10 &&
(ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING)) {
if (sscreen->info.chip_class >= GFX9) {
if (pitch) {
surface->u.gfx9.surf_pitch = pitch;
+ if (ptex->last_level == 0)
+ surface->u.gfx9.surf.epitch = pitch - 1;
surface->u.gfx9.surf_slice_size =
(uint64_t)pitch * surface->u.gfx9.surf_height * bpe;
}
- surface->u.gfx9.surf_offset = offset;
} else {
if (pitch) {
surface->u.legacy.level[0].nblk_x = pitch;
surface->u.legacy.level[0].slice_size_dw =
((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe) / 4;
}
- if (offset) {
- for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
- surface->u.legacy.level[i].offset += offset;
- }
}
return 0;
}
struct pipe_context *ctx = &sctx->b;
if (ctx == sscreen->aux_context)
- mtx_lock(&sscreen->aux_context_lock);
+ simple_mtx_lock(&sscreen->aux_context_lock);
unsigned n = sctx->num_decompress_calls;
ctx->flush_resource(ctx, &tex->buffer.b.b);
ctx->flush(ctx, NULL, 0);
if (ctx == sscreen->aux_context)
- mtx_unlock(&sscreen->aux_context_lock);
+ simple_mtx_unlock(&sscreen->aux_context_lock);
}
void si_texture_discard_cmask(struct si_screen *sscreen,
static bool si_can_disable_dcc(struct si_texture *tex)
{
/* We can't disable DCC if it can be written by another process. */
- return tex->dcc_offset &&
+ return tex->surface.dcc_offset &&
(!tex->buffer.b.is_shared ||
!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
}
+static void si_texture_zero_dcc_fields(struct si_texture *tex)
+{
+ tex->surface.dcc_offset = 0;
+ tex->surface.display_dcc_offset = 0;
+ tex->surface.dcc_retile_map_offset = 0;
+}
+
static bool si_texture_discard_dcc(struct si_screen *sscreen,
struct si_texture *tex)
{
if (!si_can_disable_dcc(tex)) {
- assert(tex->display_dcc_offset == 0);
+ assert(tex->surface.display_dcc_offset == 0);
return false;
}
assert(tex->dcc_separate_buffer == NULL);
/* Disable DCC. */
- tex->dcc_offset = 0;
- tex->display_dcc_offset = 0;
- tex->dcc_retile_map_offset = 0;
+ si_texture_zero_dcc_fields(tex);
/* Notify all contexts about the change. */
p_atomic_inc(&sscreen->dirty_tex_counter);
return false;
if (&sctx->b == sscreen->aux_context)
- mtx_lock(&sscreen->aux_context_lock);
+ simple_mtx_lock(&sscreen->aux_context_lock);
/* Decompress DCC. */
si_decompress_dcc(sctx, tex);
sctx->b.flush(&sctx->b, NULL, 0);
if (&sctx->b == sscreen->aux_context)
- mtx_unlock(&sscreen->aux_context_lock);
+ simple_mtx_unlock(&sscreen->aux_context_lock);
return si_texture_discard_dcc(sscreen, tex);
}
templ.bind |= new_bind_flag;
- if (tex->buffer.b.is_shared)
+ if (tex->buffer.b.is_shared || tex->num_planes > 1)
return;
if (new_bind_flag == PIPE_BIND_LINEAR) {
tex->buffer.flags = new_tex->buffer.flags;
tex->surface = new_tex->surface;
- tex->size = new_tex->size;
si_texture_reference(&tex->flushed_depth_texture,
new_tex->flushed_depth_texture);
- tex->fmask_offset = new_tex->fmask_offset;
- tex->cmask_offset = new_tex->cmask_offset;
+ tex->surface.fmask_offset = new_tex->surface.fmask_offset;
+ tex->surface.cmask_offset = new_tex->surface.cmask_offset;
tex->cmask_base_address_reg = new_tex->cmask_base_address_reg;
if (tex->cmask_buffer == &tex->buffer)
else
si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
- tex->dcc_offset = new_tex->dcc_offset;
+ tex->surface.dcc_offset = new_tex->surface.dcc_offset;
tex->cb_color_info = new_tex->cb_color_info;
memcpy(tex->color_clear_value, new_tex->color_clear_value,
sizeof(tex->color_clear_value));
tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
- tex->htile_offset = new_tex->htile_offset;
+ tex->surface.htile_offset = new_tex->surface.htile_offset;
tex->depth_clear_value = new_tex->depth_clear_value;
tex->dirty_level_mask = new_tex->dirty_level_mask;
tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
new_tex->last_dcc_separate_buffer);
if (new_bind_flag == PIPE_BIND_LINEAR) {
- assert(!tex->htile_offset);
+ assert(!tex->surface.htile_offset);
assert(!tex->cmask_buffer);
assert(!tex->surface.fmask_size);
- assert(!tex->dcc_offset);
+ assert(!tex->surface.dcc_offset);
assert(!tex->is_depth);
}
if (sscreen->info.chip_class >= GFX9) {
md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
- if (tex->dcc_offset && !tex->dcc_separate_buffer) {
+ if (tex->surface.dcc_offset && !tex->dcc_separate_buffer) {
uint64_t dcc_offset =
- tex->display_dcc_offset ? tex->display_dcc_offset
- : tex->dcc_offset;
+ tex->surface.display_dcc_offset ? tex->surface.display_dcc_offset
+ : tex->surface.dcc_offset;
assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
md.u.gfx9.dcc_offset_256B = dcc_offset >> 8;
/* Clear the base address and set the relative DCC offset. */
desc[0] = 0;
desc[1] &= C_008F14_BASE_ADDRESS_HI;
- desc[7] = tex->dcc_offset >> 8;
+
+ switch (sscreen->info.chip_class) {
+ case GFX6:
+ case GFX7:
+ break;
+ case GFX8:
+ desc[7] = tex->surface.dcc_offset >> 8;
+ break;
+ case GFX9:
+ desc[7] = tex->surface.dcc_offset >> 8;
+ desc[5] &= C_008F24_META_DATA_ADDRESS;
+ desc[5] |= S_008F24_META_DATA_ADDRESS(tex->surface.dcc_offset >> 40);
+ break;
+ case GFX10:
+ desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
+ desc[6] |= S_00A018_META_DATA_ADDRESS_LO(tex->surface.dcc_offset >> 8);
+ desc[7] = tex->surface.dcc_offset >> 16;
+ break;
+ default:
+ assert(0);
+ }
+
/* Dwords [2:9] contain the image descriptor. */
memcpy(&md.metadata[2], desc, sizeof(desc));
sscreen->ws->buffer_set_metadata(tex->buffer.buf, &md);
}
-static void si_get_opaque_metadata(struct si_screen *sscreen,
- struct si_texture *tex,
- struct radeon_bo_metadata *md)
+static bool si_read_tex_bo_metadata(struct si_screen *sscreen,
+ struct si_texture *tex,
+ uint64_t offset,
+ struct radeon_bo_metadata *md)
{
uint32_t *desc = &md->metadata[2];
- if (sscreen->info.chip_class < GFX8)
- return;
+ if (offset || /* Non-zero planes ignore metadata. */
+ md->size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
+ md->metadata[0] == 0 || /* invalid version number */
+ md->metadata[1] != si_get_bo_metadata_word1(sscreen)) /* invalid PCI ID */ {
+ /* Disable DCC because it might not be enabled. */
+ si_texture_zero_dcc_fields(tex);
- /* Return if DCC is enabled. The texture should be set up with it
- * already.
- */
- if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */
- md->metadata[0] != 0 &&
- md->metadata[1] == si_get_bo_metadata_word1(sscreen) &&
+ /* Don't report an error if the texture comes from an incompatible driver,
+ * but this might not work.
+ */
+ return true;
+ }
+
+ /* Validate that sample counts and the number of mipmap levels match. */
+ unsigned last_level = G_008F1C_LAST_LEVEL(desc[3]);
+ unsigned type = G_008F1C_TYPE(desc[3]);
+
+ if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA ||
+ type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+ unsigned log_samples =
+ util_logbase2(MAX2(1, tex->buffer.b.b.nr_storage_samples));
+
+ if (last_level != log_samples) {
+ fprintf(stderr, "radeonsi: invalid MSAA texture import, "
+ "metadata has log2(samples) = %u, the caller set %u\n",
+ last_level, log_samples);
+ return false;
+ }
+ } else {
+ if (last_level != tex->buffer.b.b.last_level) {
+ fprintf(stderr, "radeonsi: invalid mipmapped texture import, "
+ "metadata has last_level = %u, the caller set %u\n",
+ last_level, tex->buffer.b.b.last_level);
+ return false;
+ }
+ }
+
+ if (sscreen->info.chip_class >= GFX8 &&
G_008F28_COMPRESSION_EN(desc[6])) {
- tex->dcc_offset = (uint64_t)desc[7] << 8;
+ /* Read DCC information.
+ *
+ * Some state trackers don't set the SCANOUT flag when
+ * importing displayable images, which affects PIPE_ALIGNED
+ * and RB_ALIGNED, so we need to recover them here.
+ */
+ switch (sscreen->info.chip_class) {
+ case GFX8:
+ tex->surface.dcc_offset = (uint64_t)desc[7] << 8;
+ break;
- if (sscreen->info.chip_class >= GFX9) {
- /* Fix up parameters for displayable DCC. Some state
- * trackers don't set the SCANOUT flag when importing
- * displayable images, so we have to recover the correct
- * parameters here.
- */
+ case GFX9:
+ tex->surface.dcc_offset =
+ ((uint64_t)desc[7] << 8) |
+ ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
tex->surface.u.gfx9.dcc.pipe_aligned =
G_008F24_META_PIPE_ALIGNED(desc[5]);
tex->surface.u.gfx9.dcc.rb_aligned =
if (!tex->surface.u.gfx9.dcc.pipe_aligned &&
!tex->surface.u.gfx9.dcc.rb_aligned)
tex->surface.is_displayable = true;
+ break;
+
+ case GFX10:
+ tex->surface.dcc_offset =
+ ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) |
+ ((uint64_t)desc[7] << 16);
+ tex->surface.u.gfx9.dcc.pipe_aligned =
+ G_00A018_META_PIPE_ALIGNED(desc[6]);
+ break;
+
+ default:
+ assert(0);
+ return false;
}
- return;
+ } else {
+ /* Disable DCC. dcc_offset is always set by texture_from_handle
+ * and must be cleared here.
+ */
+ si_texture_zero_dcc_fields(tex);
}
- /* Disable DCC. These are always set by texture_from_handle and must
- * be cleared here.
- */
- tex->dcc_offset = 0;
+ return true;
}
static bool si_has_displayable_dcc(struct si_texture *tex)
* (it can't be scanned out and rendered to simultaneously)
*/
if (sscreen->info.use_display_dcc_unaligned &&
- tex->dcc_offset &&
+ tex->surface.dcc_offset &&
!tex->surface.u.gfx9.dcc.pipe_aligned &&
!tex->surface.u.gfx9.dcc.rb_aligned)
return true;
/* This needs an explicit flush (flush_resource). */
if (sscreen->info.use_display_dcc_with_retile_blit &&
- tex->display_dcc_offset)
+ tex->surface.display_dcc_offset)
return true;
return false;
}
-static void si_texture_get_info(struct pipe_screen* screen,
- struct pipe_resource *resource,
- unsigned *pstride,
- unsigned *poffset)
+static bool si_resource_get_param(struct pipe_screen *screen,
+ struct pipe_context *context,
+ struct pipe_resource *resource,
+ unsigned plane,
+ unsigned layer,
+ enum pipe_resource_param param,
+ unsigned handle_usage,
+ uint64_t *value)
{
+ for (unsigned i = 0; i < plane; i++)
+ resource = resource->next;
+
struct si_screen *sscreen = (struct si_screen*)screen;
struct si_texture *tex = (struct si_texture*)resource;
- unsigned stride = 0;
- unsigned offset = 0;
+ struct winsys_handle whandle;
- if (!sscreen || !tex)
- return;
+ switch (param) {
+ case PIPE_RESOURCE_PARAM_NPLANES:
+ *value = resource->target == PIPE_BUFFER ? 1 : tex->num_planes;
+ return true;
- if (resource->target != PIPE_BUFFER) {
- if (sscreen->info.chip_class >= GFX9) {
- offset = tex->surface.u.gfx9.surf_offset;
- stride = tex->surface.u.gfx9.surf_pitch *
- tex->surface.bpe;
- } else {
- offset = tex->surface.u.legacy.level[0].offset;
- stride = tex->surface.u.legacy.level[0].nblk_x *
- tex->surface.bpe;
- }
+ case PIPE_RESOURCE_PARAM_STRIDE:
+ if (resource->target == PIPE_BUFFER)
+ *value = 0;
+ else if (sscreen->info.chip_class >= GFX9)
+ *value = tex->surface.u.gfx9.surf_pitch * tex->surface.bpe;
+ else
+ *value = tex->surface.u.legacy.level[0].nblk_x * tex->surface.bpe;
+ return true;
+
+ case PIPE_RESOURCE_PARAM_OFFSET:
+ if (resource->target == PIPE_BUFFER)
+ *value = 0;
+ else if (sscreen->info.chip_class >= GFX9)
+ *value = tex->surface.u.gfx9.surf_offset +
+ layer * tex->surface.u.gfx9.surf_slice_size;
+ else
+ *value = tex->surface.u.legacy.level[0].offset +
+ layer * (uint64_t)tex->surface.u.legacy.level[0].slice_size_dw * 4;
+ return true;
+
+ case PIPE_RESOURCE_PARAM_MODIFIER:
+ *value = DRM_FORMAT_MOD_INVALID;
+ return true;
+
+ case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
+ case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
+ case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
+ memset(&whandle, 0, sizeof(whandle));
+
+ if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
+ whandle.type = WINSYS_HANDLE_TYPE_SHARED;
+ else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
+ whandle.type = WINSYS_HANDLE_TYPE_KMS;
+ else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
+ whandle.type = WINSYS_HANDLE_TYPE_FD;
+
+ if (!screen->resource_get_handle(screen, context, resource,
+ &whandle, handle_usage))
+ return false;
+
+ *value = whandle.handle;
+ return true;
}
+ return false;
+}
- if (pstride)
- *pstride = stride;
+static void si_texture_get_info(struct pipe_screen* screen,
+ struct pipe_resource *resource,
+ unsigned *pstride,
+ unsigned *poffset)
+{
+ uint64_t value;
- if (poffset)
- *poffset = offset;
+ if (pstride) {
+ si_resource_get_param(screen, NULL, resource, 0, 0,
+ PIPE_RESOURCE_PARAM_STRIDE, 0, &value);
+ *pstride = value;
+ }
+
+ if (poffset) {
+ si_resource_get_param(screen, NULL, resource, 0, 0,
+ PIPE_RESOURCE_PARAM_OFFSET, 0, &value);
+ *poffset = value;
+ }
}
static bool si_texture_get_handle(struct pipe_screen* screen,
sctx = (struct si_context*)(ctx ? ctx : sscreen->aux_context);
if (resource->target != PIPE_BUFFER) {
+ /* Individual planes are chained pipe_resource instances. */
+ for (unsigned i = 0; i < whandle->plane; i++) {
+ resource = resource->next;
+ res = si_resource(resource);
+ tex = (struct si_texture*)resource;
+ }
+
/* This is not supported now, but it might be required for OpenCL
* interop in the future.
*/
if (sscreen->ws->buffer_is_suballocated(res->buf) ||
tex->surface.tile_swizzle ||
(tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
- sscreen->info.has_local_buffers &&
- whandle->type != WINSYS_HANDLE_TYPE_KMS)) {
+ sscreen->info.has_local_buffers)) {
assert(!res->b.is_shared);
si_reallocate_texture_inplace(sctx, tex,
PIPE_BIND_SHARED, false);
* disable it for external clients that want write
* access.
*/
- if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) ||
+ if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->surface.dcc_offset) ||
/* Displayable DCC requires an explicit flush. */
(!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
si_has_displayable_dcc(tex))) {
}
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
- (tex->cmask_buffer || tex->dcc_offset)) {
+ (tex->cmask_buffer || tex->surface.dcc_offset)) {
/* Eliminate fast clear (both CMASK and DCC) */
si_eliminate_fast_color_clear(sctx, tex);
/* eliminate_fast_color_clear flushes the context */
}
/* Set metadata. */
- if (!res->b.is_shared || update_metadata)
+ if ((!res->b.is_shared || update_metadata) && whandle->offset == 0)
si_set_tex_bo_metadata(sscreen, tex);
if (sscreen->info.chip_class >= GFX9) {
res->external_usage = usage;
}
- return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, stride,
- offset, slice_size, whandle);
+ whandle->stride = stride;
+ whandle->offset = offset + slice_size * whandle->layer;
+
+ return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
}
static void si_texture_destroy(struct pipe_screen *screen,
static const struct u_resource_vtbl si_texture_vtbl;
-static void si_texture_get_htile_size(struct si_screen *sscreen,
- struct si_texture *tex)
-{
- unsigned cl_width, cl_height, width, height;
- unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = sscreen->info.num_tile_pipes;
-
- assert(sscreen->info.chip_class <= GFX8);
-
- tex->surface.htile_size = 0;
-
- if (tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- !sscreen->info.htile_cmask_support_1d_tiling)
- return;
-
- /* Overalign HTILE on P2 configs to work around GPU hangs in
- * piglit/depthstencil-render-miplevels 585.
- *
- * This has been confirmed to help Kabini & Stoney, where the hangs
- * are always reproducible. I think I have seen the test hang
- * on Carrizo too, though it was very rare there.
- */
- if (sscreen->info.chip_class >= GFX7 && num_pipes < 4)
- num_pipes = 4;
-
- switch (num_pipes) {
- case 1:
- cl_width = 32;
- cl_height = 16;
- break;
- case 2:
- cl_width = 32;
- cl_height = 32;
- break;
- case 4:
- cl_width = 64;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 64;
- break;
- case 16:
- cl_width = 128;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- width = align(tex->surface.u.legacy.level[0].nblk_x, cl_width * 8);
- height = align(tex->surface.u.legacy.level[0].nblk_y, cl_height * 8);
-
- slice_elements = (width * height) / (8 * 8);
- slice_bytes = slice_elements * 4;
-
- pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes;
- base_align = num_pipes * pipe_interleave_bytes;
-
- tex->surface.htile_alignment = base_align;
- tex->surface.htile_size =
- util_num_layers(&tex->buffer.b.b, 0) *
- align(slice_bytes, base_align);
-}
-
-static void si_texture_allocate_htile(struct si_screen *sscreen,
- struct si_texture *tex)
-{
- if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile)
- si_texture_get_htile_size(sscreen, tex);
-
- if (!tex->surface.htile_size)
- return;
-
- tex->htile_offset = align(tex->size, tex->surface.htile_alignment);
- tex->size = tex->htile_offset + tex->surface.htile_size;
-}
-
void si_print_texture_info(struct si_screen *sscreen,
struct si_texture *tex, struct u_log_context *log)
{
tex->surface.u.gfx9.surf.epitch,
tex->surface.u.gfx9.surf_pitch);
- if (tex->fmask_offset) {
+ if (tex->surface.fmask_offset) {
u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, swmode=%u, epitch=%u\n",
- tex->fmask_offset,
+ tex->surface.fmask_offset,
tex->surface.fmask_size,
tex->surface.fmask_alignment,
tex->surface.u.gfx9.fmask.swizzle_mode,
if (tex->cmask_buffer) {
u_log_printf(log, " CMask: offset=%"PRIu64", size=%u, "
"alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
- tex->cmask_offset,
+ tex->surface.cmask_offset,
tex->surface.cmask_size,
tex->surface.cmask_alignment,
tex->surface.u.gfx9.cmask.rb_aligned,
tex->surface.u.gfx9.cmask.pipe_aligned);
}
- if (tex->htile_offset) {
+ if (tex->surface.htile_offset) {
u_log_printf(log, " HTile: offset=%"PRIu64", size=%u, alignment=%u, "
"rb_aligned=%u, pipe_aligned=%u\n",
- tex->htile_offset,
+ tex->surface.htile_offset,
tex->surface.htile_size,
tex->surface.htile_alignment,
tex->surface.u.gfx9.htile.rb_aligned,
tex->surface.u.gfx9.htile.pipe_aligned);
}
- if (tex->dcc_offset) {
+ if (tex->surface.dcc_offset) {
u_log_printf(log, " DCC: offset=%"PRIu64", size=%u, "
"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
- tex->dcc_offset, tex->surface.dcc_size,
+ tex->surface.dcc_offset, tex->surface.dcc_size,
tex->surface.dcc_alignment,
tex->surface.u.gfx9.display_dcc_pitch_max,
tex->surface.num_dcc_levels);
tex->surface.u.legacy.tile_split, tex->surface.u.legacy.pipe_config,
(tex->surface.flags & RADEON_SURF_SCANOUT) != 0);
- if (tex->fmask_offset)
+ if (tex->surface.fmask_offset)
u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
"bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
- tex->fmask_offset, tex->surface.fmask_size, tex->surface.fmask_alignment,
+ tex->surface.fmask_offset, tex->surface.fmask_size, tex->surface.fmask_alignment,
tex->surface.u.legacy.fmask.pitch_in_pixels,
tex->surface.u.legacy.fmask.bankh,
tex->surface.u.legacy.fmask.slice_tile_max,
if (tex->cmask_buffer)
u_log_printf(log, " CMask: offset=%"PRIu64", size=%u, alignment=%u, "
"slice_tile_max=%u\n",
- tex->cmask_offset, tex->surface.cmask_size, tex->surface.cmask_alignment,
+ tex->surface.cmask_offset, tex->surface.cmask_size, tex->surface.cmask_alignment,
tex->surface.u.legacy.cmask_slice_tile_max);
- if (tex->htile_offset)
+ if (tex->surface.htile_offset)
u_log_printf(log, " HTile: offset=%"PRIu64", size=%u, "
"alignment=%u, TC_compatible = %u\n",
- tex->htile_offset, tex->surface.htile_size,
+ tex->surface.htile_offset, tex->surface.htile_size,
tex->surface.htile_alignment,
tex->tc_compatible_htile);
- if (tex->dcc_offset) {
+ if (tex->surface.dcc_offset) {
u_log_printf(log, " DCC: offset=%"PRIu64", size=%u, alignment=%u\n",
- tex->dcc_offset, tex->surface.dcc_size,
+ tex->surface.dcc_offset, tex->surface.dcc_size,
tex->surface.dcc_alignment);
for (i = 0; i <= tex->buffer.b.b.last_level; i++)
u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%u, "
}
}
-/* Common processing for si_texture_create and si_texture_from_handle */
+/**
+ * Common function for si_texture_create and si_texture_from_handle.
+ *
+ * \param screen screen
+ * \param base resource template
+ * \param surface radeon_surf
+ * \param plane0 if a non-zero plane is being created, this is the first plane
+ * \param imported_buf from si_texture_from_handle
+ * \param offset offset for non-zero planes or imported buffers
+ * \param alloc_size the size to allocate if plane0 != NULL
+ * \param alignment alignment for the allocation
+ */
static struct si_texture *
si_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
- struct pb_buffer *buf,
- struct radeon_surf *surface)
+ const struct radeon_surf *surface,
+ const struct si_texture *plane0,
+ struct pb_buffer *imported_buf,
+ uint64_t offset,
+ uint64_t alloc_size,
+ unsigned alignment)
{
struct si_texture *tex;
struct si_resource *resource;
/* don't include stencil-only formats which we don't support for rendering */
tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
-
tex->surface = *surface;
- tex->size = tex->surface.surf_size;
-
tex->tc_compatible_htile = tex->surface.htile_size != 0 &&
(tex->surface.flags &
RADEON_SURF_TC_COMPATIBLE_HTILE);
*/
tex->ps_draw_ratio = 0;
+ if (sscreen->info.chip_class >= GFX9) {
+ tex->surface.u.gfx9.surf_offset = offset;
+ } else {
+ for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ tex->surface.u.legacy.level[i].offset += offset;
+ }
+
if (tex->is_depth) {
if (sscreen->info.chip_class >= GFX9) {
tex->can_sample_z = true;
tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted;
}
- if (!(base->flags & (SI_RESOURCE_FLAG_TRANSFER |
- SI_RESOURCE_FLAG_FLUSHED_DEPTH))) {
- tex->db_compatible = true;
-
- if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
- si_texture_allocate_htile(sscreen, tex);
- }
+ tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER;
} else {
- if (base->nr_samples > 1 &&
- !buf &&
- !(sscreen->debug_flags & DBG(NO_FMASK))) {
- /* Allocate FMASK. */
- tex->fmask_offset = align64(tex->size,
- tex->surface.fmask_alignment);
- tex->size = tex->fmask_offset + tex->surface.fmask_size;
-
- /* Allocate CMASK. */
- tex->cmask_offset = align64(tex->size, tex->surface.cmask_alignment);
- tex->size = tex->cmask_offset + tex->surface.cmask_size;
+ if (tex->surface.cmask_offset) {
tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
tex->cmask_buffer = &tex->buffer;
-
- if (!tex->surface.fmask_size || !tex->surface.cmask_size)
- goto error;
- }
-
- /* Shared textures must always set up DCC here.
- * If it's not present, it will be disabled by
- * si_get_opaque_metadata later.
- */
- if (tex->surface.dcc_size &&
- (buf || !(sscreen->debug_flags & DBG(NO_DCC))) &&
- (sscreen->info.use_display_dcc_unaligned ||
- sscreen->info.use_display_dcc_with_retile_blit ||
- !(tex->surface.flags & RADEON_SURF_SCANOUT))) {
- /* Add space for the DCC buffer. */
- tex->dcc_offset = align64(tex->size, tex->surface.dcc_alignment);
- tex->size = tex->dcc_offset + tex->surface.dcc_size;
-
- if (sscreen->info.chip_class >= GFX9 &&
- tex->surface.u.gfx9.dcc_retile_num_elements) {
- /* Add space for the displayable DCC buffer. */
- tex->display_dcc_offset =
- align64(tex->size, tex->surface.u.gfx9.display_dcc_alignment);
- tex->size = tex->display_dcc_offset +
- tex->surface.u.gfx9.display_dcc_size;
-
- /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
- tex->dcc_retile_map_offset =
- align64(tex->size, sscreen->info.tcc_cache_line_size);
-
- if (tex->surface.u.gfx9.dcc_retile_use_uint16) {
- tex->size = tex->dcc_retile_map_offset +
- tex->surface.u.gfx9.dcc_retile_num_elements * 2;
- } else {
- tex->size = tex->dcc_retile_map_offset +
- tex->surface.u.gfx9.dcc_retile_num_elements * 4;
- }
- }
}
}
- /* Now create the backing buffer. */
- if (!buf) {
- si_init_resource_fields(sscreen, resource, tex->size,
- tex->surface.surf_alignment);
+ if (plane0) {
+ /* The buffer is shared with the first plane. */
+ resource->bo_size = plane0->buffer.bo_size;
+ resource->bo_alignment = plane0->buffer.bo_alignment;
+ resource->flags = plane0->buffer.flags;
+ resource->domains = plane0->buffer.domains;
+ resource->vram_usage = plane0->buffer.vram_usage;
+ resource->gart_usage = plane0->buffer.gart_usage;
+
+ pb_reference(&resource->buf, plane0->buffer.buf);
+ resource->gpu_address = plane0->buffer.gpu_address;
+ } else if (!(surface->flags & RADEON_SURF_IMPORTED)) {
+ /* Create the backing buffer. */
+ si_init_resource_fields(sscreen, resource, alloc_size, alignment);
if (!si_alloc_resource(sscreen, resource))
goto error;
} else {
- resource->buf = buf;
+ resource->buf = imported_buf;
resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
- resource->bo_size = buf->size;
- resource->bo_alignment = buf->alignment;
+ resource->bo_size = imported_buf->size;
+ resource->bo_alignment = imported_buf->alignment;
resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
if (resource->domains & RADEON_DOMAIN_VRAM)
- resource->vram_usage = buf->size;
+ resource->vram_usage = resource->bo_size;
else if (resource->domains & RADEON_DOMAIN_GTT)
- resource->gart_usage = buf->size;
+ resource->gart_usage = resource->bo_size;
}
if (tex->cmask_buffer) {
/* Initialize the cmask to 0xCC (= compressed state). */
si_screen_clear_buffer(sscreen, &tex->cmask_buffer->b.b,
- tex->cmask_offset, tex->surface.cmask_size,
+ tex->surface.cmask_offset, tex->surface.cmask_size,
0xCCCCCCCC);
}
- if (tex->htile_offset) {
+ if (tex->surface.htile_offset) {
uint32_t clear_value = 0;
if (sscreen->info.chip_class >= GFX9 || tex->tc_compatible_htile)
clear_value = 0x0000030F;
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->htile_offset,
+ tex->surface.htile_offset,
tex->surface.htile_size,
clear_value);
}
/* Initialize DCC only if the texture is not being imported. */
- if (!buf && tex->dcc_offset) {
+ if (!(surface->flags & RADEON_SURF_IMPORTED) && tex->surface.dcc_offset) {
/* Clear DCC to black for all tiles with DCC enabled.
*
* This fixes corruption in 3DMark Slingshot Extreme, which
tex->buffer.b.b.nr_samples <= 2) {
/* Simple case - all tiles have DCC enabled. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->dcc_offset,
+ tex->surface.dcc_offset,
tex->surface.dcc_size,
DCC_CLEAR_COLOR_0000);
} else if (sscreen->info.chip_class >= GFX9) {
/* Clear to uncompressed. Clearing this to black is complicated. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->dcc_offset,
+ tex->surface.dcc_offset,
tex->surface.dcc_size,
DCC_UNCOMPRESSED);
} else {
if (tex->buffer.b.b.nr_samples >= 2) {
/* Clearing this to black is complicated. */
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->dcc_offset,
+ tex->surface.dcc_offset,
tex->surface.dcc_size,
DCC_UNCOMPRESSED);
} else {
/* Mipmap levels with DCC. */
if (size) {
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->dcc_offset, size,
+ tex->surface.dcc_offset, size,
DCC_CLEAR_COLOR_0000);
}
/* Mipmap levels without DCC. */
if (size != tex->surface.dcc_size) {
si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
- tex->dcc_offset + size,
+ tex->surface.dcc_offset + size,
tex->surface.dcc_size - size,
DCC_UNCOMPRESSED);
}
}
}
- /* Upload the DCC retile map. */
- if (tex->dcc_retile_map_offset) {
- /* Use a staging buffer for the upload, because
+ /* Initialize displayable DCC that requires the retile blit. */
+ if (tex->surface.dcc_retile_map_offset) {
+ /* Uninitialized DCC can hang the display hw.
+ * Clear to white to indicate that. */
+ si_screen_clear_buffer(sscreen, &tex->buffer.b.b,
+ tex->surface.display_dcc_offset,
+ tex->surface.u.gfx9.display_dcc_size,
+ DCC_CLEAR_COLOR_1111);
+
+ /* Upload the DCC retile map.
+ * Use a staging buffer for the upload, because
* the buffer backing the texture is unmappable.
*/
bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
struct pipe_box box;
u_box_1d(0, buf->b.b.width0, &box);
- assert(tex->dcc_retile_map_offset <= UINT_MAX);
- mtx_lock(&sscreen->aux_context_lock);
+ assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
+ simple_mtx_lock(&sscreen->aux_context_lock);
sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
- tex->dcc_retile_map_offset, 0, 0,
+ tex->surface.dcc_retile_map_offset, 0, 0,
&buf->b.b, 0, &box);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
- mtx_unlock(&sscreen->aux_context_lock);
+ simple_mtx_unlock(&sscreen->aux_context_lock);
si_resource_reference(&buf, NULL);
}
/* Initialize the CMASK base register value. */
tex->cmask_base_address_reg =
- (tex->buffer.gpu_address + tex->cmask_offset) >> 8;
+ (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
if (sscreen->debug_flags & DBG(VM)) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
}
}
- struct radeon_surf surface = {0};
- bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH;
+ bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
+ templ->flags & SI_RESOURCE_FLAG_TRANSFER;
bool tc_compatible_htile =
sscreen->info.chip_class >= GFX8 &&
/* There are issues with TC-compatible HTILE on Tonga (and
!is_flushed_depth &&
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
is_zs;
- int r;
+ enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ,
+ tc_compatible_htile);
+
+ /* This allocates textures with multiple planes like NV12 in 1 buffer. */
+ enum { SI_TEXTURE_MAX_PLANES = 3 };
+ struct radeon_surf surface[SI_TEXTURE_MAX_PLANES] = {};
+ struct pipe_resource plane_templ[SI_TEXTURE_MAX_PLANES];
+ uint64_t plane_offset[SI_TEXTURE_MAX_PLANES] = {};
+ uint64_t total_size = 0;
+ unsigned max_alignment = 0;
+ unsigned num_planes = util_format_get_num_planes(templ->format);
+ assert(num_planes <= SI_TEXTURE_MAX_PLANES);
+
+ /* Compute texture or plane layouts and offsets. */
+ for (unsigned i = 0; i < num_planes; i++) {
+ plane_templ[i] = *templ;
+ plane_templ[i].format = util_format_get_plane_format(templ->format, i);
+ plane_templ[i].width0 = util_format_get_plane_width(templ->format, i, templ->width0);
+ plane_templ[i].height0 = util_format_get_plane_height(templ->format, i, templ->height0);
+
+ /* Multi-plane allocations need PIPE_BIND_SHARED, because we can't
+ * reallocate the storage to add PIPE_BIND_SHARED, because it's
+ * shared by 3 pipe_resources.
+ */
+ if (num_planes > 1)
+ plane_templ[i].bind |= PIPE_BIND_SHARED;
- r = si_init_surface(sscreen, &surface, templ,
- si_choose_tiling(sscreen, templ, tc_compatible_htile),
- 0, 0, false, false, is_flushed_depth,
- tc_compatible_htile);
- if (r) {
- return NULL;
+ if (si_init_surface(sscreen, &surface[i], &plane_templ[i],
+ tile_mode, 0, false, false,
+ is_flushed_depth, tc_compatible_htile))
+ return NULL;
+
+ plane_offset[i] = align64(total_size, surface[i].surf_alignment);
+ total_size = plane_offset[i] + surface[i].total_size;
+ max_alignment = MAX2(max_alignment, surface[i].surf_alignment);
}
- return (struct pipe_resource *)
- si_texture_create_object(screen, templ, NULL, &surface);
+ struct si_texture *plane0 = NULL, *last_plane = NULL;
+
+ for (unsigned i = 0; i < num_planes; i++) {
+ struct si_texture *tex =
+ si_texture_create_object(screen, &plane_templ[i], &surface[i],
+ plane0, NULL, plane_offset[i],
+ total_size, max_alignment);
+ if (!tex) {
+ si_texture_reference(&plane0, NULL);
+ return NULL;
+ }
+
+ tex->plane_index = i;
+ tex->num_planes = num_planes;
+
+ if (!last_plane)
+ plane0 = last_plane = tex;
+ else
+ last_plane->buffer.b.b.next = &tex->buffer.b.b;
+ }
+
+ return (struct pipe_resource *)plane0;
}
static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
bool is_scanout;
int r;
+ /* Ignore metadata for non-zero planes. */
+ if (offset != 0)
+ dedicated = false;
+
if (dedicated) {
sscreen->ws->buffer_get_metadata(buf, &metadata);
si_get_display_metadata(sscreen, &surface, &metadata,
}
r = si_init_surface(sscreen, &surface, templ,
- array_mode, stride, offset, true, is_scanout,
+ array_mode, stride, true, is_scanout,
false, false);
if (r)
return NULL;
- tex = si_texture_create_object(&sscreen->b, templ, buf, &surface);
+ tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
+ offset, 0, 0);
if (!tex)
return NULL;
tex->buffer.b.is_shared = true;
tex->buffer.external_usage = usage;
+ tex->num_planes = 1;
- si_get_opaque_metadata(sscreen, tex, &metadata);
+ if (!si_read_tex_bo_metadata(sscreen, tex, offset, &metadata)) {
+ si_texture_reference(&tex, NULL);
+ return NULL;
+ }
/* Displayable DCC requires an explicit flush. */
- if (dedicated &&
+ if (dedicated && offset == 0 &&
!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
si_has_displayable_dcc(tex)) {
/* TODO: do we need to decompress DCC? */
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct pb_buffer *buf = NULL;
- unsigned stride = 0, offset = 0;
/* Support only 2D textures without mipmaps */
- if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
- templ->depth0 != 1 || templ->last_level != 0)
+ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT &&
+ templ->target != PIPE_TEXTURE_2D_ARRAY) ||
+ templ->last_level != 0)
return NULL;
buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
- sscreen->info.max_alignment,
- &stride, &offset);
+ sscreen->info.max_alignment);
if (!buf)
return NULL;
- return si_texture_from_winsys_buffer(sscreen, templ, buf, stride,
- offset, usage, true);
+ return si_texture_from_winsys_buffer(sscreen, templ, buf,
+ whandle->stride, whandle->offset,
+ usage, true);
}
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
const struct pipe_box *box)
{
return !tex->buffer.b.is_shared &&
+ !(tex->surface.flags & RADEON_SURF_IMPORTED) &&
!(transfer_usage & PIPE_TRANSFER_READ) &&
tex->buffer.b.b.last_level == 0 &&
util_texrange_covers_whole_level(&tex->buffer.b.b, 0,
/* Initialize the CMASK base address (needed even without CMASK). */
tex->cmask_base_address_reg =
- (tex->buffer.gpu_address + tex->cmask_offset) >> 8;
+ (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
p_atomic_inc(&sscreen->dirty_tex_counter);
- sctx->num_alloc_tex_transfer_bytes += tex->size;
+ sctx->num_alloc_tex_transfer_bytes += tex->surface.total_size;
}
static void *si_texture_transfer_map(struct pipe_context *ctx,
assert(sctx->chip_class >= GFX8);
- if (tex->dcc_offset)
+ if (tex->surface.dcc_offset)
return; /* already enabled */
/* Enable the DCC stat gathering. */
}
/* dcc_offset is the absolute GPUVM address. */
- tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
+ tex->surface.dcc_offset = tex->dcc_separate_buffer->gpu_address;
/* no need to flag anything since this is called by fast clear that
* flags framebuffer state
assert(!tex->last_dcc_separate_buffer);
tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
tex->dcc_separate_buffer = NULL;
- tex->dcc_offset = 0;
+ tex->surface.dcc_offset = 0;
/* no need to flag anything since this is called after
* decompression that re-sets framebuffer state
*/
struct si_screen *sscreen = (struct si_screen*)screen;
struct si_memory_object *memobj = CALLOC_STRUCT(si_memory_object);
struct pb_buffer *buf = NULL;
- uint32_t stride, offset;
if (!memobj)
return NULL;
buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
- sscreen->info.max_alignment,
- &stride, &offset);
+ sscreen->info.max_alignment);
if (!buf) {
free(memobj);
return NULL;
memobj->b.dedicated = dedicated;
memobj->buf = buf;
- memobj->stride = stride;
+ memobj->stride = whandle->stride;
return (struct pipe_memory_object *)memobj;
{
sscreen->b.resource_from_handle = si_texture_from_handle;
sscreen->b.resource_get_handle = si_texture_get_handle;
+ sscreen->b.resource_get_param = si_resource_get_param;
sscreen->b.resource_get_info = si_texture_get_info;
sscreen->b.resource_from_memobj = si_texture_from_memobj;
sscreen->b.memobj_create_from_handle = si_memobj_from_handle;