X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeon%2Fr600_texture.c;h=4d72b86f4e47fcad460d856771652d1b5a75c19a;hb=613410c8fc1b9d6953c688b22283d940bcaec985;hp=5ed94c27264483853093248d5c1d958d87f765dd;hpb=49e3c74cdd0da5abd6cad1fb14af6cc0d85d76c9;p=mesa.git diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 5ed94c27264..4d72b86f4e4 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -30,14 +30,16 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_surface.h" #include "os/os_time.h" #include #include static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, struct r600_texture *rtex); -static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, - const struct pipe_resource *templ); +static enum radeon_surf_mode +r600_choose_tiling(struct r600_common_screen *rscreen, + const struct pipe_resource *templ); bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, @@ -51,8 +53,7 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, if (!rctx->dma.cs) return false; - if (util_format_get_blocksizebits(rdst->resource.b.b.format) != - util_format_get_blocksizebits(rsrc->resource.b.b.format)) + if (rdst->surface.bpe != rsrc->surface.bpe) return false; /* MSAA: Blits don't exist in the real world. */ @@ -71,8 +72,8 @@ bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, * src: Use the 3D path. DCC decompression is expensive. * dst: Use the 3D path to compress the pixels with DCC. */ - if ((rsrc->dcc_offset && rsrc->surface.level[src_level].dcc_enabled) || - (rdst->dcc_offset && rdst->surface.level[dst_level].dcc_enabled)) + if (vi_dcc_enabled(rsrc, src_level) || + vi_dcc_enabled(rdst, dst_level)) return false; /* CMASK as: @@ -176,178 +177,186 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 src, 0, &sbox); } -static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level, - const struct pipe_box *box) +static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, + struct r600_texture *rtex, unsigned level, + const struct pipe_box *box, + unsigned *stride, + unsigned *layer_stride) { - enum pipe_format format = rtex->resource.b.b.format; - - return rtex->surface.level[level].offset + - box->z * rtex->surface.level[level].slice_size + - box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes + - box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + if (rscreen->chip_class >= GFX9) { + *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe; + *layer_stride = rtex->surface.u.gfx9.surf_slice_size; + + if (!box) + return 0; + + /* Each texture is an array of slices. Each slice is an array + * of mipmap levels. */ + return box->z * rtex->surface.u.gfx9.surf_slice_size + + rtex->surface.u.gfx9.offset[level] + + (box->y / rtex->surface.blk_h * + rtex->surface.u.gfx9.surf_pitch + + box->x / rtex->surface.blk_w) * rtex->surface.bpe; + } else { + *stride = rtex->surface.u.legacy.level[level].nblk_x * + rtex->surface.bpe; + *layer_stride = rtex->surface.u.legacy.level[level].slice_size; + + if (!box) + return rtex->surface.u.legacy.level[level].offset; + + /* Each texture is an array of mipmap levels. Each level is + * an array of slices. */ + return rtex->surface.u.legacy.level[level].offset + + box->z * rtex->surface.u.legacy.level[level].slice_size + + (box->y / rtex->surface.blk_h * + rtex->surface.u.legacy.level[level].nblk_x + + box->x / rtex->surface.blk_w) * rtex->surface.bpe; + } } static int r600_init_surface(struct r600_common_screen *rscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, - unsigned array_mode, - bool is_flushed_depth) + enum radeon_surf_mode array_mode, + unsigned pitch_in_bytes_override, + unsigned offset, + bool is_imported, + bool is_scanout, + bool is_flushed_depth, + bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(ptex->format); bool is_depth, is_stencil; + int r; + unsigned i, bpe, flags = 0; is_depth = util_format_has_depth(desc); is_stencil = util_format_has_stencil(desc); - surface->npix_x = ptex->width0; - surface->npix_y = ptex->height0; - surface->npix_z = ptex->depth0; - surface->blk_w = util_format_get_blockwidth(ptex->format); - surface->blk_h = util_format_get_blockheight(ptex->format); - surface->blk_d = 1; - surface->array_size = 1; - surface->last_level = ptex->last_level; - if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth && ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { - surface->bpe = 4; /* stencil is allocated separately on evergreen */ + bpe = 4; /* stencil is allocated separately on evergreen */ } else { - surface->bpe = util_format_get_blocksize(ptex->format); + bpe = util_format_get_blocksize(ptex->format); /* align byte per element on dword */ - if (surface->bpe == 3) { - surface->bpe = 4; + if (bpe == 3) { + bpe = 4; } } - surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1; - surface->flags = RADEON_SURF_SET(array_mode, MODE); - - switch (ptex->target) { - case PIPE_TEXTURE_1D: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); - break; - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_2D: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); - break; - case PIPE_TEXTURE_3D: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); - break; - case PIPE_TEXTURE_1D_ARRAY: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); - surface->array_size = ptex->array_size; - break; - case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */ - assert(ptex->array_size % 6 == 0); - case PIPE_TEXTURE_2D_ARRAY: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); - surface->array_size = ptex->array_size; - break; - case PIPE_TEXTURE_CUBE: - surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE); - break; - case PIPE_BUFFER: - default: - return -EINVAL; - } - if (!is_flushed_depth && is_depth) { - surface->flags |= RADEON_SURF_ZBUFFER; + flags |= RADEON_SURF_ZBUFFER; + + if (tc_compatible_htile && + (rscreen->chip_class >= GFX9 || + array_mode == RADEON_SURF_MODE_2D)) { + /* TC-compatible HTILE only supports Z32_FLOAT. + * GFX9 also supports Z16_UNORM. + * On VI, promote Z16 to Z32. DB->CB copies will convert + * the format for transfers. + */ + if (rscreen->chip_class == VI) + bpe = 4; - if (is_stencil) { - surface->flags |= RADEON_SURF_SBUFFER | - RADEON_SURF_HAS_SBUFFER_MIPTREE; + flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; } - } - if (rscreen->chip_class >= SI) { - surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; + + if (is_stencil) + flags |= RADEON_SURF_SBUFFER; } if (rscreen->chip_class >= VI && (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)) - surface->flags |= RADEON_SURF_DISABLE_DCC; + flags |= RADEON_SURF_DISABLE_DCC; - if (ptex->bind & PIPE_BIND_SCANOUT) { + if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { /* This should catch bugs in gallium users setting incorrect flags. */ - assert(surface->nsamples == 1 && - surface->array_size == 1 && - surface->npix_z == 1 && - surface->last_level == 0 && - !(surface->flags & RADEON_SURF_Z_OR_SBUFFER)); + assert(ptex->nr_samples <= 1 && + ptex->array_size == 1 && + ptex->depth0 == 1 && + ptex->last_level == 0 && + !(flags & RADEON_SURF_Z_OR_SBUFFER)); - surface->flags |= RADEON_SURF_SCANOUT; + flags |= RADEON_SURF_SCANOUT; } - return 0; -} -static int r600_setup_surface(struct pipe_screen *screen, - struct r600_texture *rtex, - unsigned pitch_in_bytes_override, - unsigned offset) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - unsigned i; - int r; + if (is_imported) + flags |= RADEON_SURF_IMPORTED; + if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING)) + flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; - r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface); + r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe, + array_mode, surface); if (r) { return r; } - rtex->size = rtex->surface.bo_size; - - if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) { - /* old ddx on evergreen over estimate alignment for 1d, only 1 level - * for those - */ - rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe; - rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override; - rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y; - } + if (rscreen->chip_class >= GFX9) { + assert(!pitch_in_bytes_override || + pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe); + surface->u.gfx9.surf_offset = offset; + } else { + if (pitch_in_bytes_override && + pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) { + /* old ddx on evergreen over estimate alignment for 1d, only 1 level + * for those + */ + surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe; + surface->u.legacy.level[0].slice_size = pitch_in_bytes_override * + surface->u.legacy.level[0].nblk_y; + } - if (offset) { - for (i = 0; i < ARRAY_SIZE(rtex->surface.level); ++i) - rtex->surface.level[i].offset += offset; + if (offset) { + for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) + surface->u.legacy.level[i].offset += offset; + } } return 0; } -static void r600_texture_init_metadata(struct r600_texture *rtex, +static void r600_texture_init_metadata(struct r600_common_screen *rscreen, + struct r600_texture *rtex, struct radeon_bo_metadata *metadata) { struct radeon_surf *surface = &rtex->surface; memset(metadata, 0, sizeof(*metadata)); - metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->pipe_config = surface->pipe_config; - metadata->bankw = surface->bankw; - metadata->bankh = surface->bankh; - metadata->tile_split = surface->tile_split; - metadata->mtilea = surface->mtilea; - metadata->num_banks = surface->num_banks; - metadata->stride = surface->level[0].pitch_bytes; - metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; -} -static void r600_dirty_all_framebuffer_states(struct r600_common_screen *rscreen) -{ - p_atomic_inc(&rscreen->dirty_fb_counter); + if (rscreen->chip_class >= GFX9) { + metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; + } else { + metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; + metadata->u.legacy.bankw = surface->u.legacy.bankw; + metadata->u.legacy.bankh = surface->u.legacy.bankh; + metadata->u.legacy.tile_split = surface->u.legacy.tile_split; + metadata->u.legacy.mtilea = surface->u.legacy.mtilea; + metadata->u.legacy.num_banks = surface->u.legacy.num_banks; + metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; + metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; + } } -static void r600_eliminate_fast_color_clear(struct r600_common_screen *rscreen, - struct r600_texture *rtex) +static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, + struct r600_texture *rtex) { - struct pipe_context *ctx = rscreen->aux_context; + struct r600_common_screen *rscreen = rctx->screen; + struct pipe_context *ctx = &rctx->b; + + if (ctx == rscreen->aux_context) + mtx_lock(&rscreen->aux_context_lock); - pipe_mutex_lock(rscreen->aux_context_lock); ctx->flush_resource(ctx, &rtex->resource.b.b); ctx->flush(ctx, NULL, 0); - pipe_mutex_unlock(rscreen->aux_context_lock); + + if (ctx == rscreen->aux_context) + mtx_unlock(&rscreen->aux_context_lock); } static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, @@ -361,6 +370,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, /* Disable CMASK. */ memset(&rtex->cmask, 0, sizeof(rtex->cmask)); rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; + rtex->dirty_level_mask = 0; if (rscreen->chip_class >= SI) rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1); @@ -371,7 +381,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, r600_resource_reference(&rtex->cmask_buffer, NULL); /* Notify all contexts about the change. */ - r600_dirty_all_framebuffer_states(rscreen); + p_atomic_inc(&rscreen->dirty_tex_counter); p_atomic_inc(&rscreen->compressed_colortex_counter); } @@ -379,7 +389,7 @@ static bool r600_can_disable_dcc(struct r600_texture *rtex) { /* We can't disable DCC if it can be written by another process. */ return rtex->dcc_offset && - (!rtex->resource.is_shared || + (!rtex->resource.b.is_shared || !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE)); } @@ -395,24 +405,48 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, rtex->dcc_offset = 0; /* Notify all contexts about the change. */ - r600_dirty_all_framebuffer_states(rscreen); + p_atomic_inc(&rscreen->dirty_tex_counter); return true; } -bool r600_texture_disable_dcc(struct r600_common_screen *rscreen, +/** + * Disable DCC for the texture. (first decompress, then discard metadata). + * + * There is unresolved multi-context synchronization issue between + * screen::aux_context and the current context. If applications do this with + * multiple contexts, it's already undefined behavior for them and we don't + * have to worry about that. The scenario is: + * + * If context 1 disables DCC and context 2 has queued commands that write + * to the texture via CB with DCC enabled, and the order of operations is + * as follows: + * context 2 queues draw calls rendering to the texture, but doesn't flush + * context 1 disables DCC and flushes + * context 1 & 2 reset descriptors and FB state + * context 2 flushes (new compressed tiles written by the draw calls) + * context 1 & 2 read garbage, because DCC is disabled, yet there are + * compressed tiled + * + * \param rctx the current context if you have one, or rscreen->aux_context + * if you don't. + */ +bool r600_texture_disable_dcc(struct r600_common_context *rctx, struct r600_texture *rtex) { - struct r600_common_context *rctx = - (struct r600_common_context *)rscreen->aux_context; + struct r600_common_screen *rscreen = rctx->screen; if (!r600_can_disable_dcc(rtex)) return false; + if (&rctx->b == rscreen->aux_context) + mtx_lock(&rscreen->aux_context_lock); + /* Decompress DCC. */ - pipe_mutex_lock(rscreen->aux_context_lock); rctx->decompress_dcc(&rctx->b, rtex); rctx->b.flush(&rctx->b, NULL, 0); - pipe_mutex_unlock(rscreen->aux_context_lock); + + if (&rctx->b == rscreen->aux_context) + mtx_unlock(&rscreen->aux_context_lock); return r600_texture_discard_dcc(rscreen, rtex); } @@ -432,8 +466,8 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx, if (rctx->chip_class < SI) return; - if (rtex->resource.is_shared || - rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED) + if (rtex->resource.b.is_shared || + rtex->surface.is_linear) return; /* This fails with MSAA, depth, and compressed textures. */ @@ -466,7 +500,12 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx, rtex->resource.b.b.bind = templ.bind; pb_reference(&rtex->resource.buf, new_tex->resource.buf); rtex->resource.gpu_address = new_tex->resource.gpu_address; + rtex->resource.vram_usage = new_tex->resource.vram_usage; + rtex->resource.gart_usage = new_tex->resource.gart_usage; + rtex->resource.bo_size = new_tex->resource.bo_size; + rtex->resource.bo_alignment = new_tex->resource.bo_alignment; rtex->resource.domains = new_tex->resource.domains; + rtex->resource.flags = new_tex->resource.flags; rtex->size = new_tex->size; rtex->surface = new_tex->surface; rtex->non_disp_tiling = new_tex->non_disp_tiling; @@ -479,22 +518,27 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx, assert(!rtex->dcc_offset); assert(!rtex->is_depth); - pipe_resource_reference((struct pipe_resource**)&new_tex, NULL); + r600_texture_reference(&new_tex, NULL); - r600_dirty_all_framebuffer_states(rctx->screen); - p_atomic_inc(&rctx->screen->dirty_tex_descriptor_counter); + p_atomic_inc(&rctx->screen->dirty_tex_counter); } static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_context *ctx, struct pipe_resource *resource, struct winsys_handle *whandle, unsigned usage) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_common_context *rctx; struct r600_resource *res = (struct r600_resource*)resource; struct r600_texture *rtex = (struct r600_texture*)resource; struct radeon_bo_metadata metadata; bool update_metadata = false; + unsigned stride, offset, slice_size; + + ctx = threaded_context_unwrap_sync(ctx); + rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context); /* This is not supported now, but it might be required for OpenCL * interop in the future. @@ -509,24 +553,25 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, * access. */ if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { - if (r600_texture_disable_dcc(rscreen, rtex)) + if (r600_texture_disable_dcc(rctx, rtex)) update_metadata = true; } if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && - rtex->cmask.size) { + (rtex->cmask.size || rtex->dcc_offset)) { /* Eliminate fast clear (both CMASK and DCC) */ - r600_eliminate_fast_color_clear(rscreen, rtex); + r600_eliminate_fast_color_clear(rctx, rtex); /* Disable CMASK if flush_resource isn't going * to be called. */ - r600_texture_discard_cmask(rscreen, rtex); + if (rtex->cmask.size) + r600_texture_discard_cmask(rscreen, rtex); } /* Set metadata. */ - if (!res->is_shared || update_metadata) { - r600_texture_init_metadata(rtex, &metadata); + if (!res->b.is_shared || update_metadata) { + r600_texture_init_metadata(rscreen, rtex, &metadata); if (rscreen->query_opaque_metadata) rscreen->query_opaque_metadata(rscreen, rtex, &metadata); @@ -535,7 +580,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, } } - if (res->is_shared) { + if (res->b.is_shared) { /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user * doesn't set it. */ @@ -543,15 +588,23 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; } else { - res->is_shared = true; + res->b.is_shared = true; res->external_usage = usage; } - return rscreen->ws->buffer_get_handle(res->buf, - rtex->surface.level[0].pitch_bytes, - rtex->surface.level[0].offset, - rtex->surface.level[0].slice_size, - whandle); + if (rscreen->chip_class >= GFX9) { + offset = rtex->surface.u.gfx9.surf_offset; + stride = rtex->surface.u.gfx9.surf_pitch * + rtex->surface.bpe; + slice_size = rtex->surface.u.gfx9.surf_slice_size; + } else { + offset = rtex->surface.u.legacy.level[0].offset; + stride = rtex->surface.u.legacy.level[0].nblk_x * + rtex->surface.bpe; + slice_size = rtex->surface.u.legacy.level[0].slice_size; + } + return rscreen->ws->buffer_get_handle(res->buf, stride, offset, + slice_size, whandle); } static void r600_texture_destroy(struct pipe_screen *screen, @@ -560,8 +613,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, struct r600_texture *rtex = (struct r600_texture*)ptex; struct r600_resource *resource = &rtex->resource; - if (rtex->flushed_depth_texture) - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + r600_texture_reference(&rtex->flushed_depth_texture, NULL); r600_resource_reference(&rtex->htile_buffer, NULL); if (rtex->cmask_buffer != &rtex->resource) { @@ -582,35 +634,39 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, struct r600_fmask_info *out) { /* FMASK is allocated like an ordinary texture. */ - struct radeon_surf fmask = rtex->surface; + struct pipe_resource templ = rtex->resource.b.b; + struct radeon_surf fmask = {}; + unsigned flags, bpe; memset(out, 0, sizeof(*out)); - fmask.bo_alignment = 0; - fmask.bo_size = 0; - fmask.nsamples = 1; - fmask.flags |= RADEON_SURF_FMASK; + if (rscreen->chip_class >= GFX9) { + out->alignment = rtex->surface.u.gfx9.fmask_alignment; + out->size = rtex->surface.u.gfx9.fmask_size; + return; + } - /* Force 2D tiling if it wasn't set. This may occur when creating - * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample - * destination buffer must have an FMASK too. */ - fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE); - fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); + templ.nr_samples = 1; + flags = rtex->surface.flags | RADEON_SURF_FMASK; - if (rscreen->chip_class >= SI) { - fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; + if (rscreen->chip_class <= CAYMAN) { + /* Use the same parameters and tile mode. */ + fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw; + fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh; + fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea; + fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split; + + if (nr_samples <= 4) + fmask.u.legacy.bankh = 4; } switch (nr_samples) { case 2: case 4: - fmask.bpe = 1; - if (rscreen->chip_class <= CAYMAN) { - fmask.bankh = 4; - } + bpe = 1; break; case 8: - fmask.bpe = 4; + bpe = 4; break; default: R600_ERR("Invalid sample count for FMASK allocation.\n"); @@ -621,25 +677,26 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, * This can be fixed by writing a separate FMASK allocator specifically * for R600-R700 asics. */ if (rscreen->chip_class <= R700) { - fmask.bpe *= 2; + bpe *= 2; } - if (rscreen->ws->surface_init(rscreen->ws, &fmask)) { + if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe, + RADEON_SURF_MODE_2D, &fmask)) { R600_ERR("Got error in surface_init while allocating FMASK.\n"); return; } - assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); + assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); - out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; + out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; if (out->slice_tile_max) out->slice_tile_max -= 1; - out->tile_mode_index = fmask.tiling_index[0]; - out->pitch_in_pixels = fmask.level[0].nblk_x; - out->bank_height = fmask.bankh; - out->alignment = MAX2(256, fmask.bo_alignment); - out->size = fmask.bo_size; + out->tile_mode_index = fmask.u.legacy.tiling_index[0]; + out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; + out->bank_height = fmask.u.legacy.bankh; + out->alignment = MAX2(256, fmask.surf_alignment); + out->size = fmask.surf_size; } static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, @@ -670,8 +727,8 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; - unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width); - unsigned height = align(rtex->surface.npix_y, macro_tile_height); + unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width); + unsigned height = align(rtex->resource.b.b.height0, macro_tile_height); unsigned base_align = num_pipes * pipe_interleave_bytes; unsigned slice_bytes = @@ -680,10 +737,6 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, assert(macro_tile_width % 128 == 0); assert(macro_tile_height % 128 == 0); - out->pitch = pitch_elements; - out->height = height; - out->xalign = macro_tile_width; - out->yalign = macro_tile_height; out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; out->alignment = MAX2(256, base_align); out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * @@ -698,6 +751,12 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, unsigned num_pipes = rscreen->info.num_tile_pipes; unsigned cl_width, cl_height; + if (rscreen->chip_class >= GFX9) { + out->alignment = rtex->surface.u.gfx9.cmask_alignment; + out->size = rtex->surface.u.gfx9.cmask_size; + return; + } + switch (num_pipes) { case 2: cl_width = 32; @@ -722,17 +781,13 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, unsigned base_align = num_pipes * pipe_interleave_bytes; - unsigned width = align(rtex->surface.npix_x, cl_width*8); - unsigned height = align(rtex->surface.npix_y, cl_height*8); + unsigned width = align(rtex->resource.b.b.width0, cl_width*8); + unsigned height = align(rtex->resource.b.b.height0, cl_height*8); unsigned slice_elements = (width * height) / (8*8); /* Each element of CMASK is a nibble. */ unsigned slice_bytes = slice_elements / 2; - out->pitch = width; - out->height = height; - out->xalign = cl_width * 8; - out->yalign = cl_height * 8; out->slice_tile_max = (width * height) / (128*128); if (out->slice_tile_max) out->slice_tile_max -= 1; @@ -775,8 +830,11 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen } rtex->cmask_buffer = (struct r600_resource *) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, rtex->cmask.size); + r600_aligned_buffer_create(&rscreen->b, + R600_RESOURCE_FLAG_UNMAPPABLE, + PIPE_USAGE_DEFAULT, + rtex->cmask.size, + rtex->cmask.alignment); if (rtex->cmask_buffer == NULL) { rtex->cmask.size = 0; return; @@ -793,28 +851,32 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen p_atomic_inc(&rscreen->compressed_colortex_counter); } -static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, - struct r600_texture *rtex) +static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, + struct r600_texture *rtex) { unsigned cl_width, cl_height, width, height; unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = rscreen->info.num_tile_pipes; + assert(rscreen->chip_class <= VI); + + rtex->surface.htile_size = 0; + if (rscreen->chip_class <= EVERGREEN && rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26) - return 0; + return; /* HW bug on R6xx. */ if (rscreen->chip_class == R600 && - (rtex->surface.level[0].npix_x > 7680 || - rtex->surface.level[0].npix_y > 7680)) - return 0; + (rtex->resource.b.b.width0 > 7680 || + rtex->resource.b.b.height0 > 7680)) + return; /* HTILE is broken with 1D tiling on old kernels and CIK. */ if (rscreen->chip_class >= CIK && - rtex->surface.level[0].mode == RADEON_SURF_MODE_1D && + rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) - return 0; + return; /* Overalign HTILE on P2 configs to work around GPU hangs in * piglit/depthstencil-render-miplevels 585. @@ -849,11 +911,11 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, break; default: assert(0); - return 0; + return; } - width = align(rtex->surface.npix_x, cl_width * 8); - height = align(rtex->surface.npix_y, cl_height * 8); + width = align(rtex->resource.b.b.width0, cl_width * 8); + height = align(rtex->resource.b.b.height0, cl_height * 8); slice_elements = (width * height) / (8 * 8); slice_bytes = slice_elements * 4; @@ -861,55 +923,122 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; base_align = num_pipes * pipe_interleave_bytes; - rtex->htile.pitch = width; - rtex->htile.height = height; - rtex->htile.xalign = cl_width * 8; - rtex->htile.yalign = cl_height * 8; - - return (util_max_layer(&rtex->resource.b.b, 0) + 1) * + rtex->surface.htile_alignment = base_align; + rtex->surface.htile_size = + (util_max_layer(&rtex->resource.b.b, 0) + 1) * align(slice_bytes, base_align); } static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); + uint32_t clear_value; + + if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) { + clear_value = 0x0000030F; + } else { + r600_texture_get_htile_size(rscreen, rtex); + clear_value = 0; + } - if (!htile_size) + if (!rtex->surface.htile_size) return; rtex->htile_buffer = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, htile_size); + r600_aligned_buffer_create(&rscreen->b, + R600_RESOURCE_FLAG_UNMAPPABLE, + PIPE_USAGE_DEFAULT, + rtex->surface.htile_size, + rtex->surface.htile_alignment); if (rtex->htile_buffer == NULL) { /* this is not a fatal error as we can still keep rendering * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, - htile_size, 0, R600_COHERENCY_NONE); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, + 0, rtex->surface.htile_size, + clear_value); } } -void r600_print_texture_info(struct r600_texture *rtex, FILE *f) +void r600_print_texture_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, FILE *f) { int i; + /* Common parameters. */ fprintf(f, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " - "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, " + "blk_h=%u, array_size=%u, last_level=%u, " "bpe=%u, nsamples=%u, flags=0x%x, %s\n", - rtex->surface.npix_x, rtex->surface.npix_y, - rtex->surface.npix_z, rtex->surface.blk_w, - rtex->surface.blk_h, rtex->surface.blk_d, - rtex->surface.array_size, rtex->surface.last_level, - rtex->surface.bpe, rtex->surface.nsamples, + rtex->resource.b.b.width0, rtex->resource.b.b.height0, + rtex->resource.b.b.depth0, rtex->surface.blk_w, + rtex->surface.blk_h, + rtex->resource.b.b.array_size, rtex->resource.b.b.last_level, + rtex->surface.bpe, rtex->resource.b.b.nr_samples, rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); - fprintf(f, " Layout: size=%"PRIu64", alignment=%"PRIu64", bankw=%u, " + if (rscreen->chip_class >= GFX9) { + fprintf(f, " Surf: size=%"PRIu64", slice_size=%"PRIu64", " + "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n", + rtex->surface.surf_size, + rtex->surface.u.gfx9.surf_slice_size, + rtex->surface.surf_alignment, + rtex->surface.u.gfx9.surf.swizzle_mode, + rtex->surface.u.gfx9.surf.epitch, + rtex->surface.u.gfx9.surf_pitch); + + if (rtex->fmask.size) { + fprintf(f, " FMASK: offset=%"PRIu64", size=%"PRIu64", " + "alignment=%u, swmode=%u, epitch=%u\n", + rtex->fmask.offset, + rtex->surface.u.gfx9.fmask_size, + rtex->surface.u.gfx9.fmask_alignment, + rtex->surface.u.gfx9.fmask.swizzle_mode, + rtex->surface.u.gfx9.fmask.epitch); + } + + if (rtex->cmask.size) { + fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", " + "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n", + rtex->cmask.offset, + rtex->surface.u.gfx9.cmask_size, + rtex->surface.u.gfx9.cmask_alignment, + rtex->surface.u.gfx9.cmask.rb_aligned, + rtex->surface.u.gfx9.cmask.pipe_aligned); + } + + if (rtex->htile_buffer) { + fprintf(f, " HTile: size=%u, alignment=%u, " + "rb_aligned=%u, pipe_aligned=%u\n", + rtex->htile_buffer->b.b.width0, + rtex->htile_buffer->buf->alignment, + rtex->surface.u.gfx9.htile.rb_aligned, + rtex->surface.u.gfx9.htile.pipe_aligned); + } + + if (rtex->dcc_offset) { + fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", " + "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", + rtex->dcc_offset, rtex->surface.dcc_size, + rtex->surface.dcc_alignment, + rtex->surface.u.gfx9.dcc_pitch_max, + rtex->surface.num_dcc_levels); + } + + if (rtex->surface.u.gfx9.stencil_offset) { + fprintf(f, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n", + rtex->surface.u.gfx9.stencil_offset, + rtex->surface.u.gfx9.stencil.swizzle_mode, + rtex->surface.u.gfx9.stencil.epitch); + } + return; + } + + fprintf(f, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", - rtex->surface.bo_size, rtex->surface.bo_alignment, rtex->surface.bankw, - rtex->surface.bankh, rtex->surface.num_banks, rtex->surface.mtilea, - rtex->surface.tile_split, rtex->surface.pipe_config, + rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw, + rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea, + rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config, (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0); if (rtex->fmask.size) @@ -920,64 +1049,60 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f) rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index); if (rtex->cmask.size) - fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, " - "height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n", + fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " + "slice_tile_max=%u\n", rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment, - rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign, - rtex->cmask.yalign, rtex->cmask.slice_tile_max); + rtex->cmask.slice_tile_max); if (rtex->htile_buffer) - fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, " - "xalign=%u, yalign=%u\n", + fprintf(f, " HTile: size=%u, alignment=%u, TC_compatible = %u\n", rtex->htile_buffer->b.b.width0, - rtex->htile_buffer->buf->alignment, rtex->htile.pitch, - rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign); + rtex->htile_buffer->buf->alignment, + rtex->tc_compatible_htile); if (rtex->dcc_offset) { - fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n", + fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n", rtex->dcc_offset, rtex->surface.dcc_size, rtex->surface.dcc_alignment); - for (i = 0; i <= rtex->surface.last_level; i++) + for (i = 0; i <= rtex->resource.b.b.last_level; i++) fprintf(f, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", " "fast_clear_size=%"PRIu64"\n", - i, rtex->surface.level[i].dcc_enabled, - rtex->surface.level[i].dcc_offset, - rtex->surface.level[i].dcc_fast_clear_size); + i, i < rtex->surface.num_dcc_levels, + rtex->surface.u.legacy.level[i].dcc_offset, + rtex->surface.u.legacy.level[i].dcc_fast_clear_size); } - for (i = 0; i <= rtex->surface.last_level; i++) + for (i = 0; i <= rtex->resource.b.b.last_level; i++) fprintf(f, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " - "nblk_z=%u, pitch_bytes=%u, mode=%u\n", - i, rtex->surface.level[i].offset, - rtex->surface.level[i].slice_size, + "mode=%u, tiling_index = %u\n", + i, rtex->surface.u.legacy.level[i].offset, + rtex->surface.u.legacy.level[i].slice_size, u_minify(rtex->resource.b.b.width0, i), u_minify(rtex->resource.b.b.height0, i), u_minify(rtex->resource.b.b.depth0, i), - rtex->surface.level[i].nblk_x, - rtex->surface.level[i].nblk_y, - rtex->surface.level[i].nblk_z, - rtex->surface.level[i].pitch_bytes, - rtex->surface.level[i].mode); + rtex->surface.u.legacy.level[i].nblk_x, + rtex->surface.u.legacy.level[i].nblk_y, + rtex->surface.u.legacy.level[i].mode, + rtex->surface.u.legacy.tiling_index[i]); if (rtex->surface.flags & RADEON_SURF_SBUFFER) { - for (i = 0; i <= rtex->surface.last_level; i++) { - fprintf(f, " StencilLayout: tilesplit=%u\n", - rtex->surface.stencil_tile_split); + fprintf(f, " StencilLayout: tilesplit=%u\n", + rtex->surface.u.legacy.stencil_tile_split); + for (i = 0; i <= rtex->resource.b.b.last_level; i++) { fprintf(f, " StencilLevel[%i]: offset=%"PRIu64", " "slice_size=%"PRIu64", npix_x=%u, " "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " - "nblk_z=%u, pitch_bytes=%u, mode=%u\n", - i, rtex->surface.stencil_level[i].offset, - rtex->surface.stencil_level[i].slice_size, + "mode=%u, tiling_index = %u\n", + i, rtex->surface.u.legacy.stencil_level[i].offset, + rtex->surface.u.legacy.stencil_level[i].slice_size, u_minify(rtex->resource.b.b.width0, i), u_minify(rtex->resource.b.b.height0, i), u_minify(rtex->resource.b.b.depth0, i), - rtex->surface.stencil_level[i].nblk_x, - rtex->surface.stencil_level[i].nblk_y, - rtex->surface.stencil_level[i].nblk_z, - rtex->surface.stencil_level[i].pitch_bytes, - rtex->surface.stencil_level[i].mode); + rtex->surface.u.legacy.stencil_level[i].nblk_x, + rtex->surface.u.legacy.stencil_level[i].nblk_y, + rtex->surface.u.legacy.stencil_level[i].mode, + rtex->surface.u.legacy.stencil_tiling_index[i]); } } } @@ -986,8 +1111,6 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f) static struct r600_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, - unsigned pitch_in_bytes_override, - unsigned offset, struct pb_buffer *buf, struct radeon_surf *surface) { @@ -1001,6 +1124,7 @@ r600_texture_create_object(struct pipe_screen *screen, resource = &rtex->resource; resource->b.b = *base; + resource->b.b.next = NULL; resource->b.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->b.b.reference, 1); resource->b.b.screen = screen; @@ -1009,31 +1133,62 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); rtex->surface = *surface; - if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) { - FREE(rtex); - return NULL; + rtex->size = rtex->surface.surf_size; + + rtex->tc_compatible_htile = rtex->surface.htile_size != 0 && + (rtex->surface.flags & + RADEON_SURF_TC_COMPATIBLE_HTILE); + + /* TC-compatible HTILE: + * - VI only supports Z32_FLOAT. + * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ + if (rtex->tc_compatible_htile) { + if (rscreen->chip_class >= GFX9 && + base->format == PIPE_FORMAT_Z16_UNORM) + rtex->db_render_format = base->format; + else + rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; + } else { + rtex->db_render_format = base->format; } /* Tiled depth textures utilize the non-displayable tile order. * This must be done after r600_setup_surface. * Applies to R600-Cayman. */ - rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D; + rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D; /* Applies to GCN. */ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; - /* We want to optimistically enable separate DCC on the first clear - * for apps that don't call SwapBuffers or call it too late, so set - * a very high number at the beginning. If the PS/draw ratio is too - * low, the first DCC decompression will disable DCC. + /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers + * between frames, so the only thing that can enable separate DCC + * with DRI2 is multiple slow clears within a frame. */ - rtex->ps_draw_ratio = 100; + rtex->ps_draw_ratio = 0; if (rtex->is_depth) { + if (base->flags & (R600_RESOURCE_FLAG_TRANSFER | + R600_RESOURCE_FLAG_FLUSHED_DEPTH) || + rscreen->chip_class >= EVERGREEN) { + if (rscreen->chip_class >= GFX9) { + rtex->can_sample_z = true; + rtex->can_sample_s = true; + } else { + rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; + rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; + } + } else { + if (rtex->resource.b.b.nr_samples <= 1 && + (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || + rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT)) + rtex->can_sample_z = true; + } + if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | - R600_RESOURCE_FLAG_FLUSHED_DEPTH)) && - !(rscreen->debug_flags & DBG_NO_HYPERZ)) { + R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { + rtex->db_compatible = true; - r600_texture_allocate_htile(rscreen, rtex); + if (!(rscreen->debug_flags & DBG_NO_HYPERZ)) + r600_texture_allocate_htile(rscreen, rtex); } } else { if (base->nr_samples > 1) { @@ -1063,22 +1218,32 @@ r600_texture_create_object(struct pipe_screen *screen, /* Now create the backing buffer. */ if (!buf) { - if (!r600_init_resource(rscreen, resource, rtex->size, - rtex->surface.bo_alignment)) { + r600_init_resource_fields(rscreen, resource, rtex->size, + rtex->surface.surf_alignment); + + resource->flags |= RADEON_FLAG_HANDLE; + + if (!r600_alloc_resource(rscreen, resource)) { FREE(rtex); return NULL; } } else { resource->buf = buf; resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); + resource->bo_size = buf->size; + resource->bo_alignment = buf->alignment; resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); + if (resource->domains & RADEON_DOMAIN_VRAM) + resource->vram_usage = buf->size; + else if (resource->domains & RADEON_DOMAIN_GTT) + resource->gart_usage = buf->size; } if (rtex->cmask.size) { /* Initialize the cmask to 0xCC (= compressed state). */ r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, rtex->cmask.offset, rtex->cmask.size, - 0xCCCCCCCC, R600_COHERENCY_NONE); + 0xCCCCCCCC); } /* Initialize DCC only if the texture is not being imported. */ @@ -1086,7 +1251,7 @@ r600_texture_create_object(struct pipe_screen *screen, r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, rtex->dcc_offset, rtex->surface.dcc_size, - 0xFFFFFFFF, R600_COHERENCY_NONE); + 0xFFFFFFFF); } /* Initialize the CMASK base register value. */ @@ -1103,14 +1268,16 @@ r600_texture_create_object(struct pipe_screen *screen, if (rscreen->debug_flags & DBG_TEX) { puts("Texture:"); - r600_print_texture_info(rtex, stdout); + r600_print_texture_info(rscreen, rtex, stdout); + fflush(stdout); } return rtex; } -static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, - const struct pipe_resource *templ) +static enum radeon_surf_mode +r600_choose_tiling(struct r600_common_screen *rscreen, + const struct pipe_resource *templ) { const struct util_format_description *desc = util_format_description(templ->format); bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING; @@ -1155,7 +1322,9 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen, /* Textures with a very small height are recommended to be linear. */ if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY || - templ->height0 <= 4) + /* Only very thin and long 2D textures should benefit from + * linear_aligned. */ + (templ->width0 > 8 && templ->height0 <= 2)) return RADEON_SURF_MODE_LINEAR_ALIGNED; /* Textures likely to be mapped often. */ @@ -1178,20 +1347,27 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_surf surface = {0}; + bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; + bool tc_compatible_htile = + rscreen->chip_class >= VI && + (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && + !(rscreen->debug_flags & DBG_NO_HYPERZ) && + !is_flushed_depth && + templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ + util_format_is_depth_or_stencil(templ->format); + int r; r = r600_init_surface(rscreen, &surface, templ, - r600_choose_tiling(rscreen, templ), - templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); + r600_choose_tiling(rscreen, templ), 0, 0, + false, false, is_flushed_depth, + tc_compatible_htile); if (r) { return NULL; } - r = rscreen->ws->surface_best(rscreen->ws, &surface); - if (r) { - return NULL; - } - return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, - 0, NULL, &surface); + + return (struct pipe_resource *) + r600_texture_create_object(screen, templ, NULL, &surface); } static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, @@ -1207,6 +1383,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen int r; struct radeon_bo_metadata metadata = {}; struct r600_texture *rtex; + bool is_scanout; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || @@ -1219,39 +1396,53 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen rscreen->ws->buffer_get_metadata(buf, &metadata); - surface.pipe_config = metadata.pipe_config; - surface.bankw = metadata.bankw; - surface.bankh = metadata.bankh; - surface.tile_split = metadata.tile_split; - surface.mtilea = metadata.mtilea; - surface.num_banks = metadata.num_banks; - - if (metadata.macrotile == RADEON_LAYOUT_TILED) - array_mode = RADEON_SURF_MODE_2D; - else if (metadata.microtile == RADEON_LAYOUT_TILED) - array_mode = RADEON_SURF_MODE_1D; - else - array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + if (rscreen->chip_class >= GFX9) { + if (metadata.u.gfx9.swizzle_mode > 0) + array_mode = RADEON_SURF_MODE_2D; + else + array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + + is_scanout = metadata.u.gfx9.swizzle_mode == 0 || + metadata.u.gfx9.swizzle_mode % 4 == 2; + } else { + surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config; + surface.u.legacy.bankw = metadata.u.legacy.bankw; + surface.u.legacy.bankh = metadata.u.legacy.bankh; + surface.u.legacy.tile_split = metadata.u.legacy.tile_split; + surface.u.legacy.mtilea = metadata.u.legacy.mtilea; + surface.u.legacy.num_banks = metadata.u.legacy.num_banks; + + if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED) + array_mode = RADEON_SURF_MODE_2D; + else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED) + array_mode = RADEON_SURF_MODE_1D; + else + array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + + is_scanout = metadata.u.legacy.scanout; + } - r = r600_init_surface(rscreen, &surface, templ, array_mode, false); + r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, + offset, true, is_scanout, false, false); if (r) { return NULL; } - if (metadata.scanout) - surface.flags |= RADEON_SURF_SCANOUT; - - rtex = r600_texture_create_object(screen, templ, stride, - offset, buf, &surface); + rtex = r600_texture_create_object(screen, templ, buf, &surface); if (!rtex) return NULL; - rtex->resource.is_shared = true; + rtex->resource.b.is_shared = true; rtex->resource.external_usage = usage; if (rscreen->apply_opaque_metadata) rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); + /* Validate that addrlib arrived at the same surface parameters. */ + if (rscreen->chip_class >= GFX9) { + assert(metadata.u.gfx9.swizzle_mode == surface.u.gfx9.surf.swizzle_mode); + } + return &rtex->resource.b.b; } @@ -1263,12 +1454,44 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource resource; struct r600_texture **flushed_depth_texture = staging ? staging : &rtex->flushed_depth_texture; + enum pipe_format pipe_format = texture->format; + + if (!staging) { + if (rtex->flushed_depth_texture) + return true; /* it's ready */ + + if (!rtex->can_sample_z && rtex->can_sample_s) { + switch (pipe_format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + /* Save memory by not allocating the S plane. */ + pipe_format = PIPE_FORMAT_Z32_FLOAT; + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + /* Save memory bandwidth by not copying the + * stencil part during flush. + * + * This potentially increases memory bandwidth + * if an application uses both Z and S texturing + * simultaneously (a flushed Z24S8 texture + * would be stored compactly), but how often + * does that really happen? + */ + pipe_format = PIPE_FORMAT_Z24X8_UNORM; + break; + default:; + } + } else if (!rtex->can_sample_s && rtex->can_sample_z) { + assert(util_format_has_stencil(util_format_description(pipe_format))); - if (!staging && rtex->flushed_depth_texture) - return true; /* it's ready */ + /* DB->CB copies to an 8bpp surface don't work. */ + pipe_format = PIPE_FORMAT_X24S8_UINT; + } + } + memset(&resource, 0, sizeof(resource)); resource.target = texture->target; - resource.format = texture->format; + resource.format = pipe_format; resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = texture->depth0; @@ -1288,7 +1511,6 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx, return false; } - (*flushed_depth_texture)->is_flushing_texture = true; (*flushed_depth_texture)->non_disp_tiling = false; return true; } @@ -1328,7 +1550,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, { /* r600g doesn't react to dirty_tex_descriptor_counter */ return rscreen->chip_class >= SI && - !rtex->resource.is_shared && + !rtex->resource.b.is_shared && !(transfer_usage & PIPE_TRANSFER_READ) && rtex->resource.b.b.last_level == 0 && util_texrange_covers_whole_level(&rtex->resource.b.b, 0, @@ -1344,18 +1566,16 @@ static void r600_texture_invalidate_storage(struct r600_common_context *rctx, /* There is no point in discarding depth and tiled buffers. */ assert(!rtex->is_depth); - assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED); + assert(rtex->surface.is_linear); /* Reallocate the buffer in the same pipe_resource. */ - r600_init_resource(rscreen, &rtex->resource, rtex->size, - rtex->surface.bo_alignment); + r600_alloc_resource(rscreen, &rtex->resource); /* Initialize the CMASK base address (needed even without CMASK). */ rtex->cmask.base_address_reg = (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; - r600_dirty_all_framebuffer_states(rscreen); - p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); + p_atomic_inc(&rscreen->dirty_tex_counter); rctx->num_alloc_tex_transfer_bytes += rtex->size; } @@ -1376,6 +1596,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, bool use_staging_texture = false; assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER)); + assert(box->width && box->height && box->depth); /* Depth textures use staging unconditionally. */ if (!rtex->is_depth) { @@ -1398,17 +1619,18 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, /* Tiled textures need to be converted into a linear texture for CPU * access. The staging texture is always linear and is placed in GART. * - * Reading from VRAM is slow, always use the staging texture in - * this case. + * Reading from VRAM or GTT WC is slow, always use the staging + * texture in this case. * * Use the staging texture for uploads if the underlying BO * is busy. */ - if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) + if (!rtex->surface.is_linear) use_staging_texture = true; else if (usage & PIPE_TRANSFER_READ) - use_staging_texture = (rtex->resource.domains & - RADEON_DOMAIN_VRAM) != 0; + use_staging_texture = + rtex->resource.domains & RADEON_DOMAIN_VRAM || + rtex->resource.flags & RADEON_FLAG_GTT_WC; /* Write & linear only: */ else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, RADEON_USAGE_READWRITE) || @@ -1426,10 +1648,10 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, trans = CALLOC_STRUCT(r600_transfer); if (!trans) return NULL; - trans->transfer.resource = texture; - trans->transfer.level = level; - trans->transfer.usage = usage; - trans->transfer.box = *box; + pipe_resource_reference(&trans->b.b.resource, texture); + trans->b.b.level = level; + trans->b.b.usage = usage; + trans->b.b.box = *box; if (rtex->is_depth) { struct r600_texture *staging_depth; @@ -1468,8 +1690,12 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, 0, 0, 0, box->depth, 0, 0); pipe_resource_reference(&temp, NULL); } - } - else { + + /* Just get the strides. */ + r600_texture_get_offset(rctx->screen, staging_depth, level, NULL, + &trans->b.b.stride, + &trans->b.b.layer_stride); + } else { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture */ if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { @@ -1483,11 +1709,12 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, box->z, box->z + box->depth - 1, 0, 0); - offset = r600_texture_get_offset(staging_depth, level, box); + offset = r600_texture_get_offset(rctx->screen, staging_depth, + level, box, + &trans->b.b.stride, + &trans->b.b.layer_stride); } - trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes; - trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size; trans->staging = (struct r600_resource*)staging_depth; buf = trans->staging; } else if (use_staging_texture) { @@ -1507,8 +1734,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, return NULL; } trans->staging = &staging->resource; - trans->transfer.stride = staging->surface.level[0].pitch_bytes; - trans->transfer.layer_stride = staging->surface.level[0].slice_size; + + /* Just get the strides. */ + r600_texture_get_offset(rctx->screen, staging, 0, NULL, + &trans->b.b.stride, + &trans->b.b.layer_stride); if (usage & PIPE_TRANSFER_READ) r600_copy_to_staging_texture(ctx, trans); @@ -1518,9 +1748,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, buf = trans->staging; } else { /* the resource is mapped directly */ - trans->transfer.stride = rtex->surface.level[level].pitch_bytes; - trans->transfer.layer_stride = rtex->surface.level[level].slice_size; - offset = r600_texture_get_offset(rtex, level, box); + offset = r600_texture_get_offset(rctx->screen, rtex, level, box, + &trans->b.b.stride, + &trans->b.b.layer_stride); buf = &rtex->resource; } @@ -1530,7 +1760,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, return NULL; } - *ptransfer = &trans->transfer; + *ptransfer = &trans->b.b; return map + offset; } @@ -1576,6 +1806,7 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx, rctx->num_alloc_tex_transfer_bytes = 0; } + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -1586,15 +1817,123 @@ static const struct u_resource_vtbl r600_texture_vtbl = r600_texture_transfer_map, /* transfer_map */ u_default_transfer_flush_region, /* transfer_flush_region */ r600_texture_transfer_unmap, /* transfer_unmap */ - NULL /* transfer_inline_write */ }; +/* DCC channel type categories within which formats can be reinterpreted + * while keeping the same DCC encoding. The swizzle must also match. */ +enum dcc_channel_type { + dcc_channel_float32, + dcc_channel_uint32, + dcc_channel_sint32, + dcc_channel_float16, + dcc_channel_uint16, + dcc_channel_sint16, + dcc_channel_uint_10_10_10_2, + dcc_channel_uint8, + dcc_channel_sint8, + dcc_channel_incompatible, +}; + +/* Return the type of DCC encoding. */ +static enum dcc_channel_type +vi_get_dcc_channel_type(const struct util_format_description *desc) +{ + int i; + + /* Find the first non-void channel. */ + for (i = 0; i < desc->nr_channels; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + if (i == desc->nr_channels) + return dcc_channel_incompatible; + + switch (desc->channel[i].size) { + case 32: + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return dcc_channel_float32; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) + return dcc_channel_uint32; + return dcc_channel_sint32; + case 16: + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return dcc_channel_float16; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) + return dcc_channel_uint16; + return dcc_channel_sint16; + case 10: + return dcc_channel_uint_10_10_10_2; + case 8: + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) + return dcc_channel_uint8; + return dcc_channel_sint8; + default: + return dcc_channel_incompatible; + } +} + +/* Return if it's allowed to reinterpret one format as another with DCC enabled. */ +bool vi_dcc_formats_compatible(enum pipe_format format1, + enum pipe_format format2) +{ + const struct util_format_description *desc1, *desc2; + enum dcc_channel_type type1, type2; + int i; + + if (format1 == format2) + return true; + + desc1 = util_format_description(format1); + desc2 = util_format_description(format2); + + if (desc1->nr_channels != desc2->nr_channels) + return false; + + /* Swizzles must be the same. */ + for (i = 0; i < desc1->nr_channels; i++) + if (desc1->swizzle[i] <= PIPE_SWIZZLE_W && + desc2->swizzle[i] <= PIPE_SWIZZLE_W && + desc1->swizzle[i] != desc2->swizzle[i]) + return false; + + type1 = vi_get_dcc_channel_type(desc1); + type2 = vi_get_dcc_channel_type(desc2); + + return type1 != dcc_channel_incompatible && + type2 != dcc_channel_incompatible && + type1 == type2; +} + +bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, + unsigned level, + enum pipe_format view_format) +{ + struct r600_texture *rtex = (struct r600_texture *)tex; + + return vi_dcc_enabled(rtex, level) && + !vi_dcc_formats_compatible(tex->format, view_format); +} + +/* This can't be merged with the above function, because + * vi_dcc_formats_compatible should be called only when DCC is enabled. */ +void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx, + struct pipe_resource *tex, + unsigned level, + enum pipe_format view_format) +{ + struct r600_texture *rtex = (struct r600_texture *)tex; + + if (vi_dcc_enabled(rtex, level) && + !vi_dcc_formats_compatible(tex->format, view_format)) + if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex)) + rctx->decompress_dcc(&rctx->b, rtex); +} + struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *templ, + unsigned width0, unsigned height0, unsigned width, unsigned height) { - struct r600_texture *rtex = (struct r600_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); if (!surface) @@ -1610,7 +1949,14 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, surface->base.width = width; surface->base.height = height; surface->base.u = templ->u; - surface->level_info = &rtex->surface.level[templ->u.tex.level]; + + surface->width0 = width0; + surface->height0 = height0; + + surface->dcc_incompatible = + texture->target != PIPE_BUFFER && + vi_dcc_formats_are_incompatible(texture, templ->u.tex.level, + templ->format); return &surface->base; } @@ -1642,7 +1988,9 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, } } - return r600_create_surface_custom(pipe, tex, templ, width, height); + return r600_create_surface_custom(pipe, tex, templ, + tex->width0, tex->height0, + width, height); } static void r600_surface_destroy(struct pipe_context *pipe, @@ -1655,6 +2003,71 @@ static void r600_surface_destroy(struct pipe_context *pipe, FREE(surface); } +static void r600_clear_texture(struct pipe_context *pipe, + struct pipe_resource *tex, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct pipe_screen *screen = pipe->screen; + struct r600_texture *rtex = (struct r600_texture*)tex; + struct pipe_surface tmpl = {{0}}; + struct pipe_surface *sf; + const struct util_format_description *desc = + util_format_description(tex->format); + + tmpl.format = tex->format; + tmpl.u.tex.first_layer = box->z; + tmpl.u.tex.last_layer = box->z + box->depth - 1; + tmpl.u.tex.level = level; + sf = pipe->create_surface(pipe, tex, &tmpl); + if (!sf) + return; + + if (rtex->is_depth) { + unsigned clear; + float depth; + uint8_t stencil = 0; + + /* Depth is always present. */ + clear = PIPE_CLEAR_DEPTH; + desc->unpack_z_float(&depth, 0, data, 0, 1, 1); + + if (rtex->surface.flags & RADEON_SURF_SBUFFER) { + clear |= PIPE_CLEAR_STENCIL; + desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); + } + + pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil, + box->x, box->y, + box->width, box->height, false); + } else { + union pipe_color_union color; + + /* pipe_color_union requires the full vec4 representation. */ + if (util_format_is_pure_uint(tex->format)) + desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); + else if (util_format_is_pure_sint(tex->format)) + desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); + else + desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); + + if (screen->is_format_supported(screen, tex->format, + tex->target, 0, + PIPE_BIND_RENDER_TARGET)) { + pipe->clear_render_target(pipe, sf, &color, + box->x, box->y, + box->width, box->height, false); + } else { + /* Software fallback - just for R9G9B9E5_FLOAT */ + util_clear_render_target(pipe, sf, &color, + box->x, box->y, + box->width, box->height); + } + } + pipe_surface_reference(&sf, NULL); +} + unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap) { const struct util_format_description *desc = util_format_description(format); @@ -1717,6 +2130,25 @@ unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap) /* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */ +static void vi_dcc_clean_up_context_slot(struct r600_common_context *rctx, + int slot) +{ + int i; + + if (rctx->dcc_stats[slot].query_active) + vi_separate_dcc_stop_query(&rctx->b, + rctx->dcc_stats[slot].tex); + + for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[slot].ps_stats); i++) + if (rctx->dcc_stats[slot].ps_stats[i]) { + rctx->b.destroy_query(&rctx->b, + rctx->dcc_stats[slot].ps_stats[i]); + rctx->dcc_stats[slot].ps_stats[i] = NULL; + } + + r600_texture_reference(&rctx->dcc_stats[slot].tex, NULL); +} + /** * Return the per-context slot where DCC statistics queries for the texture live. */ @@ -1725,6 +2157,13 @@ static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx, { int i, empty_slot = -1; + /* Remove zombie textures (textures kept alive by this array only). */ + for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) + if (rctx->dcc_stats[i].tex && + rctx->dcc_stats[i].tex->resource.b.b.reference.count == 1) + vi_dcc_clean_up_context_slot(rctx, i); + + /* Find the texture. */ for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { /* Return if found. */ if (rctx->dcc_stats[i].tex == tex) { @@ -1748,25 +2187,12 @@ static unsigned vi_get_context_dcc_stats_index(struct r600_common_context *rctx, oldest_slot = i; /* Clean up the oldest slot. */ - if (rctx->dcc_stats[oldest_slot].query_active) - vi_separate_dcc_stop_query(&rctx->b, - rctx->dcc_stats[oldest_slot].tex); - - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats[oldest_slot].ps_stats); i++) - if (rctx->dcc_stats[oldest_slot].ps_stats[i]) { - rctx->b.destroy_query(&rctx->b, - rctx->dcc_stats[oldest_slot].ps_stats[i]); - rctx->dcc_stats[oldest_slot].ps_stats[i] = NULL; - } - - pipe_resource_reference((struct pipe_resource**) - &rctx->dcc_stats[oldest_slot].tex, NULL); + vi_dcc_clean_up_context_slot(rctx, oldest_slot); empty_slot = oldest_slot; } /* Add the texture to the new slot. */ - pipe_resource_reference((struct pipe_resource**)&rctx->dcc_stats[empty_slot].tex, - &tex->resource.b.b); + r600_texture_reference(&rctx->dcc_stats[empty_slot].tex, tex); rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get(); return empty_slot; } @@ -1831,20 +2257,26 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx, /* The intent is to use this with shared displayable back buffers, * but it's not strictly limited only to them. */ - if (!tex->resource.is_shared || + if (!tex->resource.b.is_shared || !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) || tex->resource.b.b.target != PIPE_TEXTURE_2D || - tex->surface.last_level > 0 || + tex->resource.b.b.last_level > 0 || !tex->surface.dcc_size) return; if (tex->dcc_offset) return; /* already enabled */ + /* Enable the DCC stat gathering. */ + if (!tex->dcc_gather_statistics) { + tex->dcc_gather_statistics = true; + vi_separate_dcc_start_query(&rctx->b, tex); + } + if (!vi_should_enable_separate_dcc(tex)) return; /* stats show that DCC decompression is too expensive */ - assert(tex->surface.level[0].dcc_enabled); + assert(tex->surface.num_dcc_levels); assert(!tex->dcc_separate_buffer); r600_texture_discard_cmask(rctx->screen, tex); @@ -1857,16 +2289,13 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx, tex->last_dcc_separate_buffer = NULL; } else { tex->dcc_separate_buffer = (struct r600_resource*) - r600_aligned_buffer_create(rctx->b.screen, 0, + r600_aligned_buffer_create(rctx->b.screen, + R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, tex->surface.dcc_size, tex->surface.dcc_alignment); if (!tex->dcc_separate_buffer) return; - - /* Enabling for the first time, so start the query. */ - tex->dcc_gather_statistics = true; - vi_separate_dcc_start_query(&rctx->b, tex); } /* dcc_offset is the absolute GPUVM address. */ @@ -1885,6 +2314,7 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct r600_texture *tex) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct pipe_query *tmp; unsigned i = vi_get_context_dcc_stats_index(rctx, tex); bool query_active = rctx->dcc_stats[i].query_active; bool disable = false; @@ -1895,13 +2325,15 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, /* Read the results. */ ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2], true, &result); - ctx->destroy_query(ctx, rctx->dcc_stats[i].ps_stats[2]); - rctx->dcc_stats[i].ps_stats[2] = NULL; + r600_query_hw_reset_buffers(rctx, + (struct r600_query_hw*) + rctx->dcc_stats[i].ps_stats[2]); /* Compute the approximate number of fullscreen draws. */ tex->ps_draw_ratio = result.pipeline_statistics.ps_invocations / (tex->resource.b.b.width0 * tex->resource.b.b.height0); + rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; disable = tex->dcc_separate_buffer && !vi_should_enable_separate_dcc(tex); @@ -1914,9 +2346,10 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, vi_separate_dcc_stop_query(ctx, tex); /* Move the queries in the queue by one. */ + tmp = rctx->dcc_stats[i].ps_stats[2]; rctx->dcc_stats[i].ps_stats[2] = rctx->dcc_stats[i].ps_stats[1]; rctx->dcc_stats[i].ps_stats[1] = rctx->dcc_stats[i].ps_stats[0]; - rctx->dcc_stats[i].ps_stats[0] = NULL; + rctx->dcc_stats[i].ps_stats[0] = tmp; /* create and start a new query as ps_stats[0] */ if (query_active) @@ -1943,7 +2376,16 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memset(&uc, 0, sizeof(uc)); - if (util_format_is_pure_uint(surface_format)) { + if (rtex->surface.bpe == 16) { + /* DCC fast clear only: + * CLEAR_WORD0 = R = G = B + * CLEAR_WORD1 = A + */ + assert(color->ui[0] == color->ui[1] && + color->ui[0] == color->ui[2]); + uc.ui[0] = color->ui[0]; + uc.ui[1] = color->ui[3]; + } else if (util_format_is_pure_uint(surface_format)) { util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1); } else if (util_format_is_pure_sint(surface_format)) { util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1); @@ -1954,7 +2396,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); } -static void vi_get_fast_clear_parameters(enum pipe_format surface_format, +static bool vi_get_fast_clear_parameters(enum pipe_format surface_format, const union pipe_color_union *color, uint32_t* reset_value, bool* clear_words_needed) @@ -1966,6 +2408,11 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format, int extra_channel; const struct util_format_description *desc = util_format_description(surface_format); + if (desc->block.bits == 128 && + (color->ui[0] != color->ui[1] || + color->ui[0] != color->ui[2])) + return false; + *clear_words_needed = true; *reset_value = 0x20202020U; @@ -1985,7 +2432,7 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format, else extra_channel = 0; } else - return; + return true; for (i = 0; i < 4; ++i) { int index = desc->swizzle[i] - PIPE_SWIZZLE_X; @@ -1994,18 +2441,26 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format, desc->swizzle[i] > PIPE_SWIZZLE_W) continue; - if (util_format_is_pure_sint(surface_format)) { + if (desc->channel[i].pure_integer && + desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + /* Use the maximum value for clamping the clear color. */ + int max = u_bit_consecutive(0, desc->channel[i].size - 1); + values[i] = color->i[i] != 0; - if (color->i[i] != 0 && color->i[i] != INT32_MAX) - return; - } else if (util_format_is_pure_uint(surface_format)) { + if (color->i[i] != 0 && MIN2(color->i[i], max) != max) + return true; + } else if (desc->channel[i].pure_integer && + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { + /* Use the maximum value for clamping the clear color. */ + unsigned max = u_bit_consecutive(0, desc->channel[i].size); + values[i] = color->ui[i] != 0U; - if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX) - return; + if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max) + return true; } else { values[i] = color->f[i] != 0.0F; if (color->f[i] != 0.0F && color->f[i] != 1.0F) - return; + return true; } if (index == extra_channel) @@ -2019,7 +2474,7 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format, desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel && desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W) - return; + return true; *clear_words_needed = false; if (main_value) @@ -2027,6 +2482,7 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format, if (extra_value) *reset_value |= 0x40404040U; + return true; } void vi_dcc_clear_level(struct r600_common_context *rctx, @@ -2034,9 +2490,9 @@ void vi_dcc_clear_level(struct r600_common_context *rctx, unsigned level, unsigned clear_value) { struct pipe_resource *dcc_buffer; - uint64_t dcc_offset; + uint64_t dcc_offset, clear_size; - assert(rtex->dcc_offset && rtex->surface.level[level].dcc_enabled); + assert(vi_dcc_enabled(rtex, level)); if (rtex->dcc_separate_buffer) { dcc_buffer = &rtex->dcc_separate_buffer->b.b; @@ -2046,10 +2502,18 @@ void vi_dcc_clear_level(struct r600_common_context *rctx, dcc_offset = rtex->dcc_offset; } - dcc_offset += rtex->surface.level[level].dcc_offset; + if (rctx->chip_class >= GFX9) { + /* Mipmap level clears aren't implemented. */ + assert(rtex->resource.b.b.last_level == 0); + /* MSAA needs a different clear size. */ + assert(rtex->resource.b.b.nr_samples <= 1); + clear_size = rtex->surface.dcc_size; + } else { + dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset; + clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size; + } - rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, - rtex->surface.level[level].dcc_fast_clear_size, + rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size, clear_value, R600_COHERENCY_CB_META); } @@ -2060,28 +2524,60 @@ void vi_dcc_clear_level(struct r600_common_context *rctx, static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - if (rtex->resource.is_shared || - rtex->surface.nsamples <= 1 || + if (rtex->resource.b.is_shared || + rtex->resource.b.b.nr_samples <= 1 || rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) return; - assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D); - assert(rtex->surface.last_level == 0); + assert(rscreen->chip_class >= GFX9 || + rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); + assert(rtex->resource.b.b.last_level == 0); - /* These magic numbers were copied from addrlib. It doesn't use any - * definitions for them either. They are all 2D_TILED_THIN1 modes with - * different bpp and micro tile mode. - */ - if (rscreen->chip_class >= CIK) { + if (rscreen->chip_class >= GFX9) { + /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */ + assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4); + + /* If you do swizzle_mode % 4, you'll get: + * 0 = Depth + * 1 = Standard, + * 2 = Displayable + * 3 = Rotated + * + * Depth-sample order isn't allowed: + */ + assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0); + + switch (rtex->last_msaa_resolve_target_micro_mode) { + case RADEON_MICRO_MODE_DISPLAY: + rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; + rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */ + break; + case RADEON_MICRO_MODE_THIN: + rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; + rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */ + break; + case RADEON_MICRO_MODE_ROTATED: + rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; + rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */ + break; + default: /* depth */ + assert(!"unexpected micro mode"); + return; + } + } else if (rscreen->chip_class >= CIK) { + /* These magic numbers were copied from addrlib. It doesn't use + * any definitions for them either. They are all 2D_TILED_THIN1 + * modes with different bpp and micro tile mode. + */ switch (rtex->last_msaa_resolve_target_micro_mode) { - case 0: /* displayable */ - rtex->surface.tiling_index[0] = 10; + case RADEON_MICRO_MODE_DISPLAY: + rtex->surface.u.legacy.tiling_index[0] = 10; break; - case 1: /* thin */ - rtex->surface.tiling_index[0] = 14; + case RADEON_MICRO_MODE_THIN: + rtex->surface.u.legacy.tiling_index[0] = 14; break; - case 3: /* rotated */ - rtex->surface.tiling_index[0] = 28; + case RADEON_MICRO_MODE_ROTATED: + rtex->surface.u.legacy.tiling_index[0] = 28; break; default: /* depth, thick */ assert(!"unexpected micro mode"); @@ -2089,32 +2585,32 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, } } else { /* SI */ switch (rtex->last_msaa_resolve_target_micro_mode) { - case 0: /* displayable */ + case RADEON_MICRO_MODE_DISPLAY: switch (rtex->surface.bpe) { - case 8: - rtex->surface.tiling_index[0] = 10; + case 1: + rtex->surface.u.legacy.tiling_index[0] = 10; break; - case 16: - rtex->surface.tiling_index[0] = 11; + case 2: + rtex->surface.u.legacy.tiling_index[0] = 11; break; - default: /* 32, 64 */ - rtex->surface.tiling_index[0] = 12; + default: /* 4, 8 */ + rtex->surface.u.legacy.tiling_index[0] = 12; break; } break; - case 1: /* thin */ + case RADEON_MICRO_MODE_THIN: switch (rtex->surface.bpe) { - case 8: - rtex->surface.tiling_index[0] = 14; + case 1: + rtex->surface.u.legacy.tiling_index[0] = 14; break; - case 16: - rtex->surface.tiling_index[0] = 15; + case 2: + rtex->surface.u.legacy.tiling_index[0] = 15; break; - case 32: - rtex->surface.tiling_index[0] = 16; + case 4: + rtex->surface.u.legacy.tiling_index[0] = 16; break; - default: /* 64, 128 */ - rtex->surface.tiling_index[0] = 17; + default: /* 8, 16 */ + rtex->surface.u.legacy.tiling_index[0] = 17; break; } break; @@ -2126,8 +2622,7 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; - p_atomic_inc(&rscreen->dirty_fb_counter); - p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); + p_atomic_inc(&rscreen->dirty_tex_counter); } void evergreen_do_fast_color_clear(struct r600_common_context *rctx, @@ -2159,11 +2654,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, tex = (struct r600_texture *)fb->cbufs[i]->texture; - /* 128-bit formats are unusupported */ - if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) { - continue; - } - /* the clear is allowed if all layers are bound */ if (fb->cbufs[i]->u.tex.first_layer != 0 || fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { @@ -2176,7 +2666,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, } /* only supported on tiled surfaces */ - if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) { + if (tex->surface.is_linear) { continue; } @@ -2184,13 +2674,13 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, * because there is no way to communicate the clear color among * all clients */ - if (tex->resource.is_shared && + if (tex->resource.b.is_shared && !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; /* fast color clear with 1D tiling doesn't work on old kernels and CIK */ - if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D && - rctx->chip_class >= CIK && + if (rctx->chip_class == CIK && + tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && rctx->screen->info.drm_major == 2 && rctx->screen->info.drm_minor < 38) { continue; @@ -2199,12 +2689,14 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, /* Fast clear is the most appropriate place to enable DCC for * displayable surfaces. */ - if (rctx->chip_class >= VI) { + if (rctx->chip_class >= VI && + !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) { vi_separate_dcc_try_enable(rctx, tex); - /* Stoney can't do a CMASK-based clear, so all clears are - * considered to be hypothetically slow clears, which - * is weighed when determining to enable separate DCC. + /* RB+ isn't supported with a CMASK clear only on Stoney, + * so all clears are considered to be hypothetically slow + * clears, which is weighed when determining whether to + * enable separate DCC. */ if (tex->dcc_gather_statistics && rctx->family == CHIP_STONEY) @@ -2212,25 +2704,41 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, } /* Try to clear DCC first, otherwise try CMASK. */ - if (tex->dcc_offset && tex->surface.level[0].dcc_enabled) { + if (vi_dcc_enabled(tex, 0)) { uint32_t reset_value; bool clear_words_needed; + /* TODO: fix DCC clear */ + if (rctx->chip_class >= GFX9) + continue; + if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) continue; - /* We can change the micro tile mode before a full clear. */ - if (rctx->screen->chip_class >= SI) - si_set_optimal_micro_tile_mode(rctx->screen, tex); + if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format, + color, &reset_value, + &clear_words_needed)) + continue; - vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed); vi_dcc_clear_level(rctx, tex, 0, reset_value); - if (clear_words_needed) - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level; + if (clear_words_needed) { + bool need_compressed_update = !tex->dirty_level_mask; + + tex->dirty_level_mask |= level_bit; + + if (need_compressed_update) + p_atomic_inc(&rctx->screen->compressed_colortex_counter); + } tex->separate_dcc_dirty = true; } else { - /* Stoney/RB+ doesn't work with CMASK fast clear. */ + /* 128-bit formats are unusupported */ + if (tex->surface.bpe > 8) { + continue; + } + + /* RB+ doesn't work with CMASK fast clear on Stoney. */ if (rctx->family == CHIP_STONEY) continue; @@ -2240,18 +2748,23 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } - /* We can change the micro tile mode before a full clear. */ - if (rctx->screen->chip_class >= SI) - si_set_optimal_micro_tile_mode(rctx->screen, tex); - /* Do the fast clear. */ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, tex->cmask.offset, tex->cmask.size, 0, R600_COHERENCY_CB_META); + bool need_compressed_update = !tex->dirty_level_mask; + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + + if (need_compressed_update) + p_atomic_inc(&rctx->screen->compressed_colortex_counter); } + /* We can change the micro tile mode before a full clear. */ + if (rctx->screen->chip_class >= SI) + si_set_optimal_micro_tile_mode(rctx->screen, tex); + evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); if (dirty_cbufs) @@ -2271,4 +2784,5 @@ void r600_init_context_texture_functions(struct r600_common_context *rctx) { rctx->b.create_surface = r600_create_surface; rctx->b.surface_destroy = r600_surface_destroy; + rctx->b.clear_texture = r600_clear_texture; }