#include <errno.h>
#include <inttypes.h>
-static void r600_texture_discard_dcc(struct r600_common_screen *rscreen,
- struct r600_texture *rtex);
static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex);
+static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ);
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
/* DCC as:
* src: Use the 3D path. DCC decompression is expensive.
- * dst: If overwriting the whole texture, discard DCC and use SDMA.
- * Otherwise, use the 3D path.
+ * dst: Use the 3D path to compress the pixels with DCC.
*/
- if (rsrc->dcc_offset)
+ if ((rsrc->dcc_offset && rsrc->surface.level[src_level].dcc_enabled) ||
+ (rdst->dcc_offset && rdst->surface.level[dst_level].dcc_enabled))
return false;
- if (rdst->dcc_offset) {
- /* We can't discard DCC if the texture has been exported.
- * We can only discard DCC for the entire texture.
- */
- if (rdst->resource.is_shared ||
- rdst->resource.b.b.last_level > 0 ||
- !util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
- dstx, dsty, dstz, src_box->width,
- src_box->height, src_box->depth))
- return false;
-
- r600_texture_discard_dcc(rctx->screen, rdst);
- }
-
/* CMASK as:
* src: Both texture and SDMA paths need decompression. Use SDMA.
* dst: If overwriting the whole texture, discard CMASK and use
default:
return -EINVAL;
}
- if (ptex->bind & PIPE_BIND_SCANOUT) {
- surface->flags |= RADEON_SURF_SCANOUT;
- }
if (!is_flushed_depth && is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
if (rscreen->chip_class >= SI) {
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
+
+ if (rscreen->chip_class >= VI &&
+ (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
+ ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
+ surface->flags |= RADEON_SURF_DISABLE_DCC;
+
+ if (ptex->bind & PIPE_BIND_SCANOUT) {
+ /* This should catch bugs in gallium users setting incorrect flags. */
+ assert(surface->nsamples == 1 &&
+ surface->array_size == 1 &&
+ surface->npix_z == 1 &&
+ surface->last_level == 0 &&
+ !(surface->flags & RADEON_SURF_Z_OR_SBUFFER));
+
+ surface->flags |= RADEON_SURF_SCANOUT;
+ }
return 0;
}
rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
if (rtex->cmask_buffer != &rtex->resource)
- pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
/* Notify all contexts about the change. */
r600_dirty_all_framebuffer_states(rscreen);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
-static void r600_texture_discard_dcc(struct r600_common_screen *rscreen,
+static bool r600_can_disable_dcc(struct r600_texture *rtex)
+{
+ /* We can't disable DCC if it can be written by another process. */
+ return rtex->dcc_offset &&
+ (!rtex->resource.is_shared ||
+ !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
+}
+
+static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
+ if (!r600_can_disable_dcc(rtex))
+ return false;
+
/* Disable DCC. */
rtex->dcc_offset = 0;
- rtex->cb_color_info &= ~VI_S_028C70_DCC_ENABLE(1);
/* Notify all contexts about the change. */
r600_dirty_all_framebuffer_states(rscreen);
+ return true;
}
-void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+bool r600_texture_disable_dcc(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
struct r600_common_context *rctx =
(struct r600_common_context *)rscreen->aux_context;
- if (!rtex->dcc_offset)
- return;
+ if (!r600_can_disable_dcc(rtex))
+ return false;
/* Decompress DCC. */
pipe_mutex_lock(rscreen->aux_context_lock);
rctx->b.flush(&rctx->b, NULL, 0);
pipe_mutex_unlock(rscreen->aux_context_lock);
- r600_texture_discard_dcc(rscreen, rtex);
+ return r600_texture_discard_dcc(rscreen, rtex);
+}
+
+static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
+ struct r600_texture *rtex,
+ bool invalidate_storage)
+{
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_texture *new_tex;
+ struct pipe_resource templ = rtex->resource.b.b;
+ unsigned i;
+
+ templ.bind |= PIPE_BIND_LINEAR;
+
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ if (rctx->chip_class < SI)
+ return;
+
+ if (rtex->resource.is_shared ||
+ rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
+ return;
+
+ /* This fails with MSAA, depth, and compressed textures. */
+ if (r600_choose_tiling(rctx->screen, &templ) !=
+ RADEON_SURF_MODE_LINEAR_ALIGNED)
+ return;
+
+ new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
+ if (!new_tex)
+ return;
+
+ /* Copy the pixels to the new texture. */
+ if (!invalidate_storage) {
+ for (i = 0; i <= templ.last_level; i++) {
+ struct pipe_box box;
+
+ u_box_3d(0, 0, 0,
+ u_minify(templ.width0, i), u_minify(templ.height0, i),
+ util_max_layer(&templ, i) + 1, &box);
+
+ rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
+ &rtex->resource.b.b, i, &box);
+ }
+ }
+
+ r600_texture_discard_cmask(rctx->screen, rtex);
+ r600_texture_discard_dcc(rctx->screen, rtex);
+
+ /* Replace the structure fields of rtex. */
+ rtex->resource.b.b.bind = templ.bind;
+ pb_reference(&rtex->resource.buf, new_tex->resource.buf);
+ rtex->resource.gpu_address = new_tex->resource.gpu_address;
+ rtex->resource.domains = new_tex->resource.domains;
+ rtex->size = new_tex->size;
+ rtex->surface = new_tex->surface;
+ rtex->non_disp_tiling = new_tex->non_disp_tiling;
+ rtex->cb_color_info = new_tex->cb_color_info;
+ rtex->cmask = new_tex->cmask; /* needed even without CMASK */
+
+ assert(!rtex->htile_buffer);
+ assert(!rtex->cmask.size);
+ assert(!rtex->fmask.size);
+ assert(!rtex->dcc_offset);
+ assert(!rtex->is_depth);
+
+ pipe_resource_reference((struct pipe_resource**)&new_tex, NULL);
+
+ r600_dirty_all_framebuffer_states(rctx->screen);
+ p_atomic_inc(&rctx->screen->dirty_tex_descriptor_counter);
}
static boolean r600_texture_get_handle(struct pipe_screen* screen,
* access.
*/
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
- r600_texture_disable_dcc(rscreen, rtex);
- update_metadata = true;
+ if (r600_texture_disable_dcc(rscreen, rtex))
+ update_metadata = true;
}
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
* to be called.
*/
r600_texture_discard_cmask(rscreen, rtex);
- update_metadata = true;
}
/* Set metadata. */
if (rtex->flushed_depth_texture)
pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
- pipe_resource_reference((struct pipe_resource**)&rtex->htile_buffer, NULL);
+ r600_resource_reference(&rtex->htile_buffer, NULL);
if (rtex->cmask_buffer != &rtex->resource) {
- pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
}
pb_reference(&resource->buf, NULL);
FREE(rtex);
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
for (i = 0; i <= rtex->surface.last_level; i++)
- fprintf(f, " DCCLevel[%i]: offset=%"PRIu64"\n",
- i, rtex->surface.level[i].dcc_offset);
+ fprintf(f, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
+ "fast_clear_size=%"PRIu64"\n",
+ i, rtex->surface.level[i].dcc_enabled,
+ rtex->surface.level[i].dcc_offset,
+ rtex->surface.level[i].dcc_fast_clear_size);
}
for (i = 0; i <= rtex->surface.last_level; i++)
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+ /* Applies to GCN. */
+ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
if (rtex->is_depth) {
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
}
}
- if (!buf && rtex->surface.dcc_size &&
- !(rscreen->debug_flags & DBG_NO_DCC)) {
+ /* Shared textures must always set up DCC here.
+ * If it's not present, it will be disabled by
+ * apply_opaque_metadata later.
+ */
+ if (rtex->surface.dcc_size &&
+ (buf || !(rscreen->debug_flags & DBG_NO_DCC))) {
/* Reserve space for the DCC buffer. */
rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
- rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
}
}
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC, R600_COHERENCY_NONE);
}
- if (rtex->dcc_offset) {
+
+ /* Initialize DCC only if the texture is not being imported. */
+ if (!buf && rtex->dcc_offset) {
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->dcc_offset,
rtex->surface.dcc_size,
rtex->resource.is_shared = true;
rtex->resource.external_usage = usage;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
return &rtex->resource.b.b;
}
return false;
}
- (*flushed_depth_texture)->is_flushing_texture = TRUE;
+ (*flushed_depth_texture)->is_flushing_texture = true;
(*flushed_depth_texture)->non_disp_tiling = false;
return true;
}
}
}
+static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned transfer_usage,
+ const struct pipe_box *box)
+{
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ return rscreen->chip_class >= SI &&
+ !rtex->resource.is_shared &&
+ !(transfer_usage & PIPE_TRANSFER_READ) &&
+ rtex->resource.b.b.last_level == 0 &&
+ util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
+ box->x, box->y, box->z,
+ box->width, box->height,
+ box->depth);
+}
+
+static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+
+ /* There is no point in discarding depth and tiled buffers. */
+ assert(!rtex->is_depth);
+ assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
+
+ /* Reallocate the buffer in the same pipe_resource. */
+ r600_init_resource(rscreen, &rtex->resource, rtex->size,
+ rtex->surface.bo_alignment);
+
+ /* Initialize the CMASK base address (needed even without CMASK). */
+ rtex->cmask.base_address_reg =
+ (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+ r600_dirty_all_framebuffer_states(rscreen);
+ p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+
+ rctx->num_alloc_tex_transfer_bytes += rtex->size;
+}
+
static void *r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
/* Depth textures use staging unconditionally. */
if (!rtex->is_depth) {
+ /* Degrade the tile mode if we get too many transfers on APUs.
+ * On dGPUs, the staging texture is always faster.
+ * Only count uploads that are at least 4x4 pixels large.
+ */
+ if (!rctx->screen->info.has_dedicated_vram &&
+ level == 0 &&
+ box->width >= 4 && box->height >= 4 &&
+ p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
+ bool can_invalidate =
+ r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box);
+
+ r600_degrade_tile_mode_to_linear(rctx, rtex,
+ can_invalidate);
+ }
+
/* Tiled textures need to be converted into a linear texture for CPU
* access. The staging texture is always linear and is placed in GART.
*
else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rtex->resource.buf, 0,
- RADEON_USAGE_READWRITE))
- use_staging_texture = true;
+ RADEON_USAGE_READWRITE)) {
+ /* It's busy. */
+ if (r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box))
+ r600_texture_invalidate_storage(rctx, rtex);
+ else
+ use_staging_texture = true;
+ }
}
trans = CALLOC_STRUCT(r600_transfer);
}
if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
- pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
+ r600_resource_reference(&trans->staging, NULL);
FREE(trans);
return NULL;
}
static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
}
}
- if (rtransfer->staging)
- pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+ if (rtransfer->staging) {
+ rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
+ r600_resource_reference(&rtransfer->staging, NULL);
+ }
+
+ /* Heuristic for {upload, draw, upload, draw, ..}:
+ *
+ * Flush the gfx IB if we've allocated too much texture storage.
+ *
+ * The idea is that we don't want to build IBs that use too much
+ * memory and put pressure on the kernel memory manager and we also
+ * want to make temporary and invalidated buffers go idle ASAP to
+ * decrease the total memory usage or make them reusable. The memory
+ * usage will be slightly higher than given here because of the buffer
+ * cache in the winsys.
+ *
+ * The result is that the kernel memory manager is never a bottleneck.
+ */
+ if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
+ rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ rctx->num_alloc_tex_transfer_bytes = 0;
+ }
FREE(transfer);
}
struct pipe_surface *surface)
{
struct r600_surface *surf = (struct r600_surface*)surface;
- pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_fmask, NULL);
- pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_cmask, NULL);
+ r600_resource_reference(&surf->cb_buffer_fmask, NULL);
+ r600_resource_reference(&surf->cb_buffer_cmask, NULL);
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
}
surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
extra_channel = -1;
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
- if(r600_translate_colorswap(surface_format, FALSE) <= 1)
+ if(r600_translate_colorswap(surface_format, false) <= 1)
extra_channel = desc->nr_channels - 1;
else
extra_channel = 0;
*reset_value |= 0x40404040U;
}
+void vi_dcc_clear_level(struct r600_common_context *rctx,
+ struct r600_texture *rtex,
+ unsigned level, unsigned clear_value)
+{
+ struct pipe_resource *dcc_buffer = &rtex->resource.b.b;
+ uint64_t dcc_offset = rtex->dcc_offset +
+ rtex->surface.level[level].dcc_offset;
+
+ assert(rtex->dcc_offset && rtex->surface.level[level].dcc_enabled);
+
+ rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset,
+ rtex->surface.level[level].dcc_fast_clear_size,
+ clear_value, R600_COHERENCY_CB_META);
+}
+
+/* Set the same micro tile mode as the destination of the last MSAA resolve.
+ * This allows hitting the MSAA resolve fast path, which requires that both
+ * src and dst micro tile modes match.
+ */
+static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (rtex->resource.is_shared ||
+ rtex->surface.nsamples <= 1 ||
+ rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
+ return;
+
+ assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
+ assert(rtex->surface.last_level == 0);
+
+ /* These magic numbers were copied from addrlib. It doesn't use any
+ * definitions for them either. They are all 2D_TILED_THIN1 modes with
+ * different bpp and micro tile mode.
+ */
+ if (rscreen->chip_class >= CIK) {
+ switch (rtex->last_msaa_resolve_target_micro_mode) {
+ case 0: /* displayable */
+ rtex->surface.tiling_index[0] = 10;
+ break;
+ case 1: /* thin */
+ rtex->surface.tiling_index[0] = 14;
+ break;
+ case 3: /* rotated */
+ rtex->surface.tiling_index[0] = 28;
+ break;
+ default: /* depth, thick */
+ assert(!"unexpected micro mode");
+ return;
+ }
+ } else { /* SI */
+ switch (rtex->last_msaa_resolve_target_micro_mode) {
+ case 0: /* displayable */
+ switch (rtex->surface.bpe) {
+ case 8:
+ rtex->surface.tiling_index[0] = 10;
+ break;
+ case 16:
+ rtex->surface.tiling_index[0] = 11;
+ break;
+ default: /* 32, 64 */
+ rtex->surface.tiling_index[0] = 12;
+ break;
+ }
+ break;
+ case 1: /* thin */
+ switch (rtex->surface.bpe) {
+ case 8:
+ rtex->surface.tiling_index[0] = 14;
+ break;
+ case 16:
+ rtex->surface.tiling_index[0] = 15;
+ break;
+ case 32:
+ rtex->surface.tiling_index[0] = 16;
+ break;
+ default: /* 64, 128 */
+ rtex->surface.tiling_index[0] = 17;
+ break;
+ }
+ break;
+ default: /* depth, thick */
+ assert(!"unexpected micro mode");
+ return;
+ }
+ }
+
+ rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
+
+ p_atomic_inc(&rscreen->dirty_fb_counter);
+ p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+}
+
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
continue;
}
- if (tex->dcc_offset) {
+ if (tex->dcc_offset && tex->surface.level[0].dcc_enabled) {
uint32_t reset_value;
bool clear_words_needed;
if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
continue;
- vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
+ /* We can change the micro tile mode before a full clear. */
+ if (rctx->screen->chip_class >= SI)
+ si_set_optimal_micro_tile_mode(rctx->screen, tex);
- rctx->clear_buffer(&rctx->b, &tex->resource.b.b,
- tex->dcc_offset, tex->surface.dcc_size,
- reset_value, R600_COHERENCY_CB_META);
+ vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
+ vi_dcc_clear_level(rctx, tex, 0, reset_value);
if (clear_words_needed)
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
continue;
}
+ /* We can change the micro tile mode before a full clear. */
+ if (rctx->screen->chip_class >= SI)
+ si_set_optimal_micro_tile_mode(rctx->screen, tex);
+
/* Do the fast clear. */
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
tex->cmask.offset, tex->cmask.size, 0,