X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_texture.c;h=014391142411070aba893afb853e41d406e95ac6;hb=e2e700f6053d0b16ba46e4d5c5b20e965fb2224e;hp=4f7744a887df3b4756e6859c670d71303b9f537f;hpb=d7008fe46a8f689ce4ee2b14b61dc39baebccaa8;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 4f7744a887d..01439114241 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -27,7 +27,7 @@ #include "si_pipe.h" #include "si_query.h" #include "sid.h" -#include "state_tracker/drm_driver.h" +#include "frontend/drm_driver.h" #include "util/format/u_format.h" #include "util/os_time.h" #include "util/u_log.h" @@ -74,6 +74,13 @@ bool si_prepare_for_dma_blit(struct si_context *sctx, struct si_texture *dst, un if (vi_dcc_enabled(src, src_level) || vi_dcc_enabled(dst, dst_level)) return false; + /* TMZ: mixing encrypted and non-encrypted buffer in a single command + * doesn't seem supported. + */ + if ((src->buffer.flags & RADEON_FLAG_ENCRYPTED) != + (dst->buffer.flags & RADEON_FLAG_ENCRYPTED)) + return false; + /* CMASK as: * src: Both texture and SDMA paths need decompression. Use SDMA. * dst: If overwriting the whole texture, discard CMASK and use @@ -209,8 +216,8 @@ static unsigned si_texture_get_offset(struct si_screen *sscreen, struct si_textu static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, enum radeon_surf_mode array_mode, - unsigned pitch_in_bytes_override, bool is_imported, bool is_scanout, - bool is_flushed_depth, bool tc_compatible_htile) + bool is_imported, bool is_scanout, bool is_flushed_depth, + bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(ptex->format); bool is_depth, is_stencil; @@ -250,7 +257,9 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac } if (sscreen->info.chip_class >= GFX8 && - (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT || + (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || + (sscreen->info.chip_class < GFX10_3 && + ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT) || (ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed))) flags |= RADEON_SURF_DISABLE_DCC; @@ -291,8 +300,6 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac flags |= RADEON_SURF_SHAREABLE; if (is_imported) flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE; - if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING)) - flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; if (sscreen->debug_flags & DBG(NO_FMASK)) flags |= RADEON_SURF_NO_FMASK; @@ -301,9 +308,11 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac surface->micro_tile_mode = SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(ptex->flags); } - if (sscreen->info.chip_class >= GFX10 && (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING)) { + if (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING) { flags |= RADEON_SURF_FORCE_SWIZZLE_MODE; - surface->u.gfx9.surf.swizzle_mode = ADDR_SW_64KB_R_X; + + if (sscreen->info.chip_class >= GFX10) + surface->u.gfx9.surf.swizzle_mode = ADDR_SW_64KB_R_X; } r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, array_mode, surface); @@ -311,62 +320,11 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac return r; } - unsigned pitch = pitch_in_bytes_override / bpe; - - if (sscreen->info.chip_class >= GFX9) { - if (pitch) { - surface->u.gfx9.surf_pitch = pitch; - if (ptex->last_level == 0) - surface->u.gfx9.surf.epitch = pitch - 1; - surface->u.gfx9.surf_slice_size = (uint64_t)pitch * surface->u.gfx9.surf_height * bpe; - } - } else { - if (pitch) { - surface->u.legacy.level[0].nblk_x = pitch; - surface->u.legacy.level[0].slice_size_dw = - ((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe) / 4; - } - } return 0; } -static void si_get_display_metadata(struct si_screen *sscreen, struct radeon_surf *surf, - struct radeon_bo_metadata *metadata, - enum radeon_surf_mode *array_mode, bool *is_scanout) -{ - if (sscreen->info.chip_class >= GFX9) { - if (metadata->u.gfx9.swizzle_mode > 0) - *array_mode = RADEON_SURF_MODE_2D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - - surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; - *is_scanout = metadata->u.gfx9.scanout; - - if (metadata->u.gfx9.dcc_offset_256B) { - surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max; - assert(metadata->u.gfx9.dcc_independent_64B == 1); - } - } else { - surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; - surf->u.legacy.bankw = metadata->u.legacy.bankw; - surf->u.legacy.bankh = metadata->u.legacy.bankh; - surf->u.legacy.tile_split = metadata->u.legacy.tile_split; - surf->u.legacy.mtilea = metadata->u.legacy.mtilea; - surf->u.legacy.num_banks = metadata->u.legacy.num_banks; - - if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_2D; - else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_1D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - - *is_scanout = metadata->u.legacy.scanout; - } -} - -void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex) +void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex, + bool *ctx_flushed) { struct si_screen *sscreen = sctx->screen; struct pipe_context *ctx = &sctx->b; @@ -378,8 +336,14 @@ void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *t ctx->flush_resource(ctx, &tex->buffer.b.b); /* Flush only if any fast clear elimination took place. */ + bool flushed = false; if (n != sctx->num_decompress_calls) + { ctx->flush(ctx, NULL, 0); + flushed = true; + } + if (ctx_flushed) + *ctx_flushed = flushed; if (ctx == sscreen->aux_context) simple_mtx_unlock(&sscreen->aux_context_lock); @@ -416,13 +380,6 @@ static bool si_can_disable_dcc(struct si_texture *tex) !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)); } -static void si_texture_zero_dcc_fields(struct si_texture *tex) -{ - tex->surface.dcc_offset = 0; - tex->surface.display_dcc_offset = 0; - tex->surface.dcc_retile_map_offset = 0; -} - static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture *tex) { if (!si_can_disable_dcc(tex)) @@ -431,7 +388,7 @@ static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture assert(tex->dcc_separate_buffer == NULL); /* Disable DCC. */ - si_texture_zero_dcc_fields(tex); + ac_surface_zero_dcc_fields(&tex->surface); /* Notify all contexts about the change. */ p_atomic_inc(&sscreen->dirty_tex_counter); @@ -591,67 +548,16 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex p_atomic_inc(&sctx->screen->dirty_tex_counter); } -static uint32_t si_get_bo_metadata_word1(struct si_screen *sscreen) -{ - return (ATI_VENDOR_ID << 16) | sscreen->info.pci_id; -} - static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex) { - struct radeon_surf *surface = &tex->surface; struct pipe_resource *res = &tex->buffer.b.b; struct radeon_bo_metadata md; memset(&md, 0, sizeof(md)); - if (sscreen->info.chip_class >= GFX9) { - md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; - md.u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; - - if (tex->surface.dcc_offset && !tex->dcc_separate_buffer) { - uint64_t dcc_offset = tex->surface.display_dcc_offset ? tex->surface.display_dcc_offset - : tex->surface.dcc_offset; - - assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); - md.u.gfx9.dcc_offset_256B = dcc_offset >> 8; - md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max; - md.u.gfx9.dcc_independent_64B = 1; - } - } else { - md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D - ? RADEON_LAYOUT_TILED - : RADEON_LAYOUT_LINEAR; - md.u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D - ? RADEON_LAYOUT_TILED - : RADEON_LAYOUT_LINEAR; - md.u.legacy.pipe_config = surface->u.legacy.pipe_config; - md.u.legacy.bankw = surface->u.legacy.bankw; - md.u.legacy.bankh = surface->u.legacy.bankh; - md.u.legacy.tile_split = surface->u.legacy.tile_split; - md.u.legacy.mtilea = surface->u.legacy.mtilea; - md.u.legacy.num_banks = surface->u.legacy.num_banks; - md.u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; - md.u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; - } - assert(tex->dcc_separate_buffer == NULL); assert(tex->surface.fmask_size == 0); - /* Metadata image format format version 1: - * [0] = 1 (metadata format identifier) - * [1] = (VENDOR_ID << 16) | PCI_ID - * [2:9] = image descriptor for the whole resource - * [2] is always 0, because the base address is cleared - * [9] is the DCC offset bits [39:8] from the beginning of - * the buffer - * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level - */ - - md.metadata[0] = 1; /* metadata image format version 1 */ - - /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ - md.metadata[1] = si_get_bo_metadata_word1(sscreen); - static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W}; bool is_array = util_texture_is_array(res->target); @@ -660,128 +566,13 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture sscreen->make_texture_descriptor(sscreen, tex, true, res->target, res->format, swizzle, 0, res->last_level, 0, is_array ? res->array_size - 1 : 0, res->width0, res->height0, res->depth0, desc, NULL); - si_set_mutable_tex_desc_fields(sscreen, tex, &tex->surface.u.legacy.level[0], 0, 0, - tex->surface.blk_w, false, desc); - - /* Clear the base address and set the relative DCC offset. */ - desc[0] = 0; - desc[1] &= C_008F14_BASE_ADDRESS_HI; - - switch (sscreen->info.chip_class) { - case GFX6: - case GFX7: - break; - case GFX8: - desc[7] = tex->surface.dcc_offset >> 8; - break; - case GFX9: - desc[7] = tex->surface.dcc_offset >> 8; - desc[5] &= C_008F24_META_DATA_ADDRESS; - desc[5] |= S_008F24_META_DATA_ADDRESS(tex->surface.dcc_offset >> 40); - break; - case GFX10: - desc[6] &= C_00A018_META_DATA_ADDRESS_LO; - desc[6] |= S_00A018_META_DATA_ADDRESS_LO(tex->surface.dcc_offset >> 8); - desc[7] = tex->surface.dcc_offset >> 16; - break; - default: - assert(0); - } - - /* Dwords [2:9] contain the image descriptor. */ - memcpy(&md.metadata[2], desc, sizeof(desc)); - md.size_metadata = 10 * 4; - - /* Dwords [10:..] contain the mipmap level offsets. */ - if (sscreen->info.chip_class <= GFX8) { - for (unsigned i = 0; i <= res->last_level; i++) - md.metadata[10 + i] = tex->surface.u.legacy.level[i].offset >> 8; - - md.size_metadata += (1 + res->last_level) * 4; - } + tex->surface.blk_w, false, false, desc); - sscreen->ws->buffer_set_metadata(tex->buffer.buf, &md); -} - -static bool si_read_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex, - uint64_t offset, struct radeon_bo_metadata *md) -{ - uint32_t *desc = &md->metadata[2]; - - if (offset || /* Non-zero planes ignore metadata. */ - md->size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */ - md->metadata[0] == 0 || /* invalid version number */ - md->metadata[1] != si_get_bo_metadata_word1(sscreen)) /* invalid PCI ID */ { - /* Disable DCC because it might not be enabled. */ - si_texture_zero_dcc_fields(tex); - - /* Don't report an error if the texture comes from an incompatible driver, - * but this might not work. - */ - return true; - } - - /* Validate that sample counts and the number of mipmap levels match. */ - unsigned last_level = G_008F1C_LAST_LEVEL(desc[3]); - unsigned type = G_008F1C_TYPE(desc[3]); - - if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { - unsigned log_samples = util_logbase2(MAX2(1, tex->buffer.b.b.nr_storage_samples)); - - if (last_level != log_samples) { - fprintf(stderr, - "radeonsi: invalid MSAA texture import, " - "metadata has log2(samples) = %u, the caller set %u\n", - last_level, log_samples); - return false; - } - } else { - if (last_level != tex->buffer.b.b.last_level) { - fprintf(stderr, - "radeonsi: invalid mipmapped texture import, " - "metadata has last_level = %u, the caller set %u\n", - last_level, tex->buffer.b.b.last_level); - return false; - } - } - - if (sscreen->info.chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) { - /* Read DCC information. */ - switch (sscreen->info.chip_class) { - case GFX8: - tex->surface.dcc_offset = (uint64_t)desc[7] << 8; - break; - - case GFX9: - tex->surface.dcc_offset = - ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40); - tex->surface.u.gfx9.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]); - tex->surface.u.gfx9.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]); - - /* If DCC is unaligned, this can only be a displayable image. */ - if (!tex->surface.u.gfx9.dcc.pipe_aligned && !tex->surface.u.gfx9.dcc.rb_aligned) - assert(tex->surface.is_displayable); - break; - - case GFX10: - tex->surface.dcc_offset = - ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); - tex->surface.u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); - break; - - default: - assert(0); - return false; - } - } else { - /* Disable DCC. dcc_offset is always set by texture_from_handle - * and must be cleared here. - */ - si_texture_zero_dcc_fields(tex); - } - - return true; + ac_surface_get_umd_metadata(&sscreen->info, &tex->surface, + tex->buffer.b.b.last_level + 1, + desc, &md.size_metadata, md.metadata); + sscreen->ws->buffer_set_metadata(tex->buffer.buf, &md, &tex->surface); } static bool si_has_displayable_dcc(struct si_texture *tex) @@ -791,18 +582,7 @@ static bool si_has_displayable_dcc(struct si_texture *tex) if (sscreen->info.chip_class <= GFX8) return false; - /* This needs a cache flush before scanout. - * (it can't be scanned out and rendered to simultaneously) - */ - if (sscreen->info.use_display_dcc_unaligned && tex->surface.dcc_offset && - !tex->surface.u.gfx9.dcc.pipe_aligned && !tex->surface.u.gfx9.dcc.rb_aligned) - return true; - - /* This needs an explicit flush (flush_resource). */ - if (sscreen->info.use_display_dcc_with_retile_blit && tex->surface.display_dcc_offset) - return true; - - return false; + return tex->surface.is_displayable && tex->surface.dcc_offset; } static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context, @@ -941,9 +721,11 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && (tex->cmask_buffer || tex->surface.dcc_offset)) { /* Eliminate fast clear (both CMASK and DCC) */ - si_eliminate_fast_color_clear(sctx, tex); - /* eliminate_fast_color_clear flushes the context */ - flush = false; + bool flushed; + si_eliminate_fast_color_clear(sctx, tex, &flushed); + /* eliminate_fast_color_clear sometimes flushes the context */ + if (flushed) + flush = false; /* Disable CMASK if flush_resource isn't going * to be called. @@ -1020,13 +802,9 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { - struct si_screen *sscreen = (struct si_screen *)screen; struct si_texture *tex = (struct si_texture *)ptex; struct si_resource *resource = &tex->buffer; - if (sscreen->info.chip_class >= GFX9) - free(tex->surface.u.gfx9.dcc_retile_map); - si_texture_reference(&tex->flushed_depth_texture, NULL); if (tex->cmask_buffer != &tex->buffer) { @@ -1075,19 +853,16 @@ void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex, if (tex->cmask_buffer) { u_log_printf(log, " CMask: offset=%" PRIu64 ", size=%u, " - "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n", + "alignment=%u\n", tex->surface.cmask_offset, tex->surface.cmask_size, - tex->surface.cmask_alignment, tex->surface.u.gfx9.cmask.rb_aligned, - tex->surface.u.gfx9.cmask.pipe_aligned); + tex->surface.cmask_alignment); } if (tex->surface.htile_offset) { u_log_printf(log, - " HTile: offset=%" PRIu64 ", size=%u, alignment=%u, " - "rb_aligned=%u, pipe_aligned=%u\n", + " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", tex->surface.htile_offset, tex->surface.htile_size, - tex->surface.htile_alignment, tex->surface.u.gfx9.htile.rb_aligned, - tex->surface.u.gfx9.htile.pipe_aligned); + tex->surface.htile_alignment); } if (tex->surface.dcc_offset) { @@ -1197,7 +972,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, const struct radeon_surf *surface, const struct si_texture *plane0, - struct pb_buffer *imported_buf, uint64_t offset, + struct pb_buffer *imported_buf, + uint64_t offset, unsigned pitch_in_bytes, uint64_t alloc_size, unsigned alignment) { struct si_texture *tex; @@ -1210,7 +986,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource = &tex->buffer; resource->b.b = *base; - resource->b.b.next = NULL; resource->b.vtbl = &si_texture_vtbl; pipe_reference_init(&resource->b.b.reference, 1); resource->b.b.screen = screen; @@ -1218,13 +993,20 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* don't include stencil-only formats which we don't support for rendering */ tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format)); tex->surface = *surface; - tex->tc_compatible_htile = - tex->surface.htile_size != 0 && (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE); + + /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE + * setting, so we have to enable it if we enabled it at allocation. + * + * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be + * enabled on demand. + */ + tex->tc_compatible_htile = sscreen->info.chip_class == GFX8 && + tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; /* TC-compatible HTILE: * - GFX8 only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ - if (tex->tc_compatible_htile) { + if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) { if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM) tex->db_render_format = base->format; else { @@ -1245,12 +1027,9 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, */ tex->ps_draw_ratio = 0; - if (sscreen->info.chip_class >= GFX9) { - tex->surface.u.gfx9.surf_offset = offset; - } else { - for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) - tex->surface.u.legacy.level[i].offset += offset; - } + ac_surface_override_offset_stride(&sscreen->info, &tex->surface, + tex->buffer.b.b.last_level + 1, + offset, pitch_in_bytes / tex->surface.bpe); if (tex->is_depth) { if (sscreen->info.chip_class >= GFX9) { @@ -1259,9 +1038,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Stencil texturing with HTILE doesn't work * with mipmapping on Navi10-14. */ - if ((sscreen->info.family == CHIP_NAVI10 || sscreen->info.family == CHIP_NAVI12 || - sscreen->info.family == CHIP_NAVI14) && - base->last_level > 0) + if (sscreen->info.chip_class == GFX10 && base->last_level > 0) tex->htile_stencil_disabled = true; } else { tex->can_sample_z = !tex->surface.u.legacy.depth_adjusted; @@ -1303,6 +1080,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->vram_usage = resource->bo_size; else if (resource->domains & RADEON_DOMAIN_GTT) resource->gart_usage = resource->bo_size; + if (sscreen->ws->buffer_get_flags) + resource->flags = sscreen->ws->buffer_get_flags(resource->buf); } if (tex->cmask_buffer) { @@ -1380,20 +1159,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, */ bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16; unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements; + unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4); struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM, - num_elements * (use_uint16 ? 2 : 4), + dcc_retile_map_size, sscreen->info.tcc_cache_line_size); - uint32_t *ui = (uint32_t *)sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE); - uint16_t *us = (uint16_t *)ui; + void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE); - /* Upload the retile map into a staging buffer. */ - if (use_uint16) { - for (unsigned i = 0; i < num_elements; i++) - us[i] = tex->surface.u.gfx9.dcc_retile_map[i]; - } else { - for (unsigned i = 0; i < num_elements; i++) - ui[i] = tex->surface.u.gfx9.dcc_retile_map[i]; - } + /* Upload the retile map into the staging buffer. */ + memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size); /* Copy the staging buffer to the buffer backing the texture. */ struct si_context *sctx = (struct si_context *)sscreen->aux_context; @@ -1435,8 +1208,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, error: FREE(tex); - if (sscreen->info.chip_class >= GFX9) - free(surface->u.gfx9.dcc_retile_map); return NULL; } @@ -1454,7 +1225,7 @@ static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen, return RADEON_SURF_MODE_2D; /* Transfer resources should be linear. */ - if (templ->flags & SI_RESOURCE_FLAG_TRANSFER) + if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR) return RADEON_SURF_MODE_LINEAR_ALIGNED; /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8, @@ -1510,7 +1281,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, if (templ->nr_samples >= 2) { /* This is hackish (overwriting the const pipe_resource template), - * but should be harmless and state trackers can also see + * but should be harmless and gallium frontends can also see * the overriden number of samples in the created pipe_resource. */ if (is_zs && sscreen->eqaa_force_z_samples) { @@ -1522,8 +1293,8 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, } } - bool is_flushed_depth = - templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH || templ->flags & SI_RESOURCE_FLAG_TRANSFER; + bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH || + templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR; bool tc_compatible_htile = sscreen->info.chip_class >= GFX8 && /* There are issues with TC-compatible HTILE on Tonga (and @@ -1565,7 +1336,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, if (num_planes > 1) plane_templ[i].bind |= PIPE_BIND_SHARED; - if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, 0, false, + if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, false, plane_templ[i].bind & PIPE_BIND_SCANOUT, is_flushed_depth, tc_compatible_htile)) return NULL; @@ -1580,7 +1351,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, for (unsigned i = 0; i < num_planes; i++) { struct si_texture *tex = si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL, - plane_offset[i], total_size, max_alignment); + plane_offset[i], 0, total_size, max_alignment); if (!tex) { si_texture_reference(&plane0, NULL); return NULL; @@ -1603,14 +1374,12 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen, const struct pipe_resource *templ, struct pb_buffer *buf, unsigned stride, - unsigned offset, unsigned usage, + uint64_t offset, unsigned usage, bool dedicated) { - enum radeon_surf_mode array_mode; struct radeon_surf surface = {}; struct radeon_bo_metadata metadata = {}; struct si_texture *tex; - bool is_scanout; int r; /* Ignore metadata for non-zero planes. */ @@ -1618,8 +1387,7 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc dedicated = false; if (dedicated) { - sscreen->ws->buffer_get_metadata(buf, &metadata); - si_get_display_metadata(sscreen, &surface, &metadata, &array_mode, &is_scanout); + sscreen->ws->buffer_get_metadata(buf, &metadata, &surface); } else { /** * The bo metadata is unset for un-dedicated images. So we fall @@ -1643,16 +1411,16 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc * tiling information when the TexParameter TEXTURE_TILING_EXT * is set. */ - array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - is_scanout = false; + metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED; } - r = - si_init_surface(sscreen, &surface, templ, array_mode, stride, true, is_scanout, false, false); + r = si_init_surface(sscreen, &surface, templ, metadata.mode, true, + surface.flags & RADEON_SURF_SCANOUT, false, false); if (r) return NULL; - tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf, offset, 0, 0); + tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf, + offset, stride, 0, 0); if (!tex) return NULL; @@ -1660,7 +1428,20 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc tex->buffer.external_usage = usage; tex->num_planes = 1; - if (!si_read_tex_bo_metadata(sscreen, tex, offset, &metadata)) { + /* Account for multiple planes with lowered yuv import. */ + struct pipe_resource *next_plane = tex->buffer.b.b.next; + while(next_plane) { + struct si_texture *next_tex = (struct si_texture *)next_plane; + ++next_tex->num_planes; + ++tex->num_planes; + next_plane = next_plane->next; + } + + if (!ac_surface_set_umd_metadata(&sscreen->info, &tex->surface, + tex->buffer.b.b.nr_storage_samples, + tex->buffer.b.b.last_level + 1, + metadata.size_metadata, + metadata.metadata)) { si_texture_reference(&tex, NULL); return NULL; } @@ -1765,7 +1546,7 @@ bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resourc */ static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig, const struct pipe_box *box, unsigned level, - unsigned flags) + unsigned usage, unsigned flags) { memset(res, 0, sizeof(*res)); res->format = orig->format; @@ -1773,10 +1554,10 @@ static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pip res->height0 = box->height; res->depth0 = 1; res->array_size = 1; - res->usage = flags & SI_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; + res->usage = usage; res->flags = flags; - if (flags & SI_RESOURCE_FLAG_TRANSFER && util_format_is_compressed(orig->format)) { + if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig->format)) { /* Transfer resources are allocated with linear tiling, which is * not supported for compressed formats. */ @@ -1842,7 +1623,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou char *map; bool use_staging_texture = false; - assert(!(texture->flags & SI_RESOURCE_FLAG_TRANSFER)); + assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)); assert(box->width && box->height && box->depth); if (tex->is_depth) { @@ -1869,7 +1650,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou * Use the staging texture for uploads if the underlying BO * is busy. */ - if (!tex->surface.is_linear) + if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)) use_staging_texture = true; else if (usage & PIPE_TRANSFER_READ) use_staging_texture = @@ -1896,9 +1677,24 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou if (use_staging_texture) { struct pipe_resource resource; struct si_texture *staging; + unsigned bo_usage = usage & PIPE_TRANSFER_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; + unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR; - si_init_temp_resource_from_box(&resource, texture, box, level, SI_RESOURCE_FLAG_TRANSFER); - resource.usage = (usage & PIPE_TRANSFER_READ) ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; + /* The pixel shader has a bad access pattern for linear textures. + * If a pixel shader is used to blit to/from staging, don't disable caches. + * + * MSAA, depth/stencil textures, and compressed textures use the pixel shader + * to blit. + */ + if (texture->nr_samples <= 1 && + !tex->is_depth && + !util_format_is_compressed(texture->format) && + /* Texture uploads with DCC use the pixel shader to blit */ + (!(usage & PIPE_TRANSFER_WRITE) || !vi_dcc_enabled(tex, level))) + bo_flags |= SI_RESOURCE_FLAG_UNCACHED; + + si_init_temp_resource_from_box(&resource, texture, box, level, bo_usage, + bo_flags); /* Since depth-stencil textures don't support linear tiling, * blit from ZS to color and vice versa. u_blitter will do