From 7cfb364b1a373bce6b0b273556953fbb78c139e9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 9 Nov 2016 17:16:36 -0500 Subject: [PATCH] swr: rework resource layout and surface setup This is a bit of a mega-commit, but unfortunately there's no great way to break this up since a lot of different pieces have to match up. Here we do the following: - change surface layout to match swr's Load/StoreTile expectations - fix sampler settings to respect all sampler view parameters - fix stencil sampling to read from secondary resource - respect pipe surface format, level, and layer settings - fix resource map/unmap based on the new layout logic - fix resource map/unmap to copy proper parts of stencil values in and out of the matching depth texture These fix a massive quantity of piglits, including all the tex-miplevel-selection ones. Note that the swr native miptree layout isn't extremely space-efficient, and we end up using it for all textures, not just the renderable ones. A back-of-the-envelope calculation suggests about 10%-25% increased memory usage for miptrees, depending on the number of LODs. Single-LOD textures should be unaffected. There are a handful of regressions as a result of this change: - Some textureGrad tests, these failures match llvmpipe. (There are debug settings allowing improved gallivm sampling accurancy.) - Some layered clearing tests as swr doesn't currently support that. It was getting lucky before because enough other things were broken. Signed-off-by: Ilia Mirkin Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/swr_context.cpp | 103 +++++++++--- src/gallium/drivers/swr/swr_draw.cpp | 4 +- src/gallium/drivers/swr/swr_resource.h | 8 +- src/gallium/drivers/swr/swr_screen.cpp | 203 +++++++++++++++++------- src/gallium/drivers/swr/swr_shader.cpp | 28 +++- src/gallium/drivers/swr/swr_state.cpp | 166 +++++++++++-------- 6 files changed, 352 insertions(+), 160 deletions(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 3f5771219f6..5a1927cb349 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -139,21 +139,35 @@ swr_transfer_map(struct pipe_context *pipe, if (!pt) return NULL; pipe_resource_reference(&pt->resource, resource); + pt->usage = (pipe_transfer_usage)usage; pt->level = level; pt->box = *box; - pt->stride = spr->row_stride[level]; - pt->layer_stride = spr->img_stride[level]; - - /* if we're mapping the depth/stencil, copy in stencil */ - if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT - && spr->has_stencil) { - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { - spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; - } - } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT - && spr->has_stencil) { - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { - spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; + pt->stride = spr->swr.pitch; + pt->layer_stride = spr->swr.qpitch * spr->swr.pitch; + + /* if we're mapping the depth/stencil, copy in stencil for the section + * being read in + */ + if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) { + size_t zbase, sbase; + for (int z = box->z; z < box->z + box->depth; z++) { + zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch + + spr->mip_offsets[level]; + sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch + + spr->secondary_mip_offsets[level]; + for (int y = box->y; y < box->y + box->height; y++) { + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { + for (int x = box->x; x < box->x + box->width; x++) + spr->swr.pBaseAddress[zbase + 4 * x + 3] = + spr->secondary.pBaseAddress[sbase + x]; + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + for (int x = box->x; x < box->x + box->width; x++) + spr->swr.pBaseAddress[zbase + 8 * x + 4] = + spr->secondary.pBaseAddress[sbase + x]; + } + zbase += spr->swr.pitch; + sbase += spr->secondary.pitch; + } } } @@ -167,23 +181,60 @@ swr_transfer_map(struct pipe_context *pipe, } static void -swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +swr_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *flush_box) { assert(transfer->resource); + assert(transfer->usage & PIPE_TRANSFER_WRITE); - struct swr_resource *res = swr_resource(transfer->resource); - /* if we're mapping the depth/stencil, copy out stencil */ - if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT - && res->has_stencil) { - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; - } - } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT - && res->has_stencil) { - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4]; + struct swr_resource *spr = swr_resource(transfer->resource); + if (!spr->has_depth || !spr->has_stencil) + return; + + size_t zbase, sbase; + struct pipe_box box = *flush_box; + box.x += transfer->box.x; + box.y += transfer->box.y; + box.z += transfer->box.z; + for (int z = box.z; z < box.z + box.depth; z++) { + zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch + + spr->mip_offsets[transfer->level]; + sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch + + spr->secondary_mip_offsets[transfer->level]; + for (int y = box.y; y < box.y + box.height; y++) { + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { + for (int x = box.x; x < box.x + box.width; x++) + spr->secondary.pBaseAddress[sbase + x] = + spr->swr.pBaseAddress[zbase + 4 * x + 3]; + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + for (int x = box.x; x < box.x + box.width; x++) + spr->secondary.pBaseAddress[sbase + x] = + spr->swr.pBaseAddress[zbase + 8 * x + 4]; + } + zbase += spr->swr.pitch; + sbase += spr->secondary.pitch; } } +} + +static void +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + struct swr_resource *spr = swr_resource(transfer->resource); + /* if we're mapping the depth/stencil, copy in stencil for the section + * being written out + */ + if (transfer->usage & PIPE_TRANSFER_WRITE && + !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && + spr->has_depth && spr->has_stencil) { + struct pipe_box box; + u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, + transfer->box.depth, &box); + swr_transfer_flush_region(pipe, transfer, &box); + } pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); @@ -424,8 +475,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) ctx->pipe.surface_destroy = swr_surface_destroy; ctx->pipe.transfer_map = swr_transfer_map; ctx->pipe.transfer_unmap = swr_transfer_unmap; + ctx->pipe.transfer_flush_region = swr_transfer_flush_region; - ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; ctx->pipe.buffer_subdata = u_default_buffer_subdata; ctx->pipe.texture_subdata = u_default_texture_subdata; diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index 920ca9f582c..e8c5b23f11d 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -281,7 +281,9 @@ swr_store_dirty_resource(struct pipe_context *pipe, swr_draw_context *pDC = &ctx->swrDC; SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) - if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress || + (spr->secondary.pBaseAddress && + renderTargets[i].pBaseAddress == spr->secondary.pBaseAddress)) { swr_store_render_target(pipe, i, post_tile_state); /* Mesa thinks depth/stencil are fused, so we'll never get an diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h index 00001e94f79..41abd77cba0 100644 --- a/src/gallium/drivers/swr/swr_resource.h +++ b/src/gallium/drivers/swr/swr_resource.h @@ -41,17 +41,13 @@ struct swr_resource { bool has_depth; bool has_stencil; - UINT alignedWidth; - UINT alignedHeight; - SWR_SURFACE_STATE swr; SWR_SURFACE_STATE secondary; /* for faking depth/stencil merged formats */ struct sw_displaytarget *display_target; - unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS]; - unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS]; - unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; + size_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; + size_t secondary_mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; enum swr_resource_status status; }; diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 9723c40adf2..c558e74776e 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -44,6 +44,8 @@ extern "C" { #include "jit_api.h" +#include "memory/TilingFunctions.h" + #include #include @@ -722,12 +724,14 @@ swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res) struct sw_winsys *winsys = screen->winsys; struct sw_displaytarget *dt; + const unsigned width = align(res->swr.width, res->swr.halign); + const unsigned height = align(res->swr.height, res->swr.valign); + UINT stride; dt = winsys->displaytarget_create(winsys, res->base.bind, res->base.format, - res->alignedWidth, - res->alignedHeight, + width, height, 64, NULL, &stride); @@ -741,14 +745,14 @@ swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res) /* Clear the display target surface */ if (map) - memset(map, 0, res->alignedHeight * stride); + memset(map, 0, height * stride); winsys->displaytarget_unmap(winsys, dt); return TRUE; } -static boolean +static bool swr_texture_layout(struct swr_screen *screen, struct swr_resource *res, boolean allocate) @@ -764,87 +768,164 @@ swr_texture_layout(struct swr_screen *screen, if (res->has_stencil && !res->has_depth) fmt = PIPE_FORMAT_R8_UINT; + /* We always use the SWR layout. For 2D and 3D textures this looks like: + * + * |<------- pitch ------->| + * +=======================+------- + * |Array 0 | ^ + * | | | + * | Level 0 | | + * | | | + * | | qpitch + * +-----------+-----------+ | + * | | L2L2L2L2 | | + * | Level 1 | L3L3 | | + * | | L4 | v + * +===========+===========+------- + * |Array 1 | + * | | + * | Level 0 | + * | | + * | | + * +-----------+-----------+ + * | | L2L2L2L2 | + * | Level 1 | L3L3 | + * | | L4 | + * +===========+===========+ + * + * The overall width in bytes is known as the pitch, while the overall + * height in rows is the qpitch. Array slices are laid out logically below + * one another, qpitch rows apart. For 3D surfaces, the "level" values are + * just invalid for the higher array numbers (since depth is also + * minified). 1D and 1D array surfaces are stored effectively the same way, + * except that pitch never plays into it. All the levels are logically + * adjacent to each other on the X axis. The qpitch becomes the number of + * elements between array slices, while the pitch is unused. + * + * Each level's sizes are subject to the valign and halign settings of the + * surface. For compressed formats that swr is unaware of, we will use an + * appropriately-sized uncompressed format, and scale the widths/heights. + * + * This surface is stored inside res->swr. For depth/stencil textures, + * res->secondary will have an identically-laid-out but R8_UINT-formatted + * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp + * texels, to simplify map/unmap logic which copies the stencil values + * in/out. + */ + res->swr.width = pt->width0; res->swr.height = pt->height0; - res->swr.depth = pt->depth0; res->swr.type = swr_convert_target_type(pt->target); res->swr.tileMode = SWR_TILE_NONE; res->swr.format = mesa_to_swr_format(fmt); - res->swr.numSamples = (1 << pt->nr_samples); + res->swr.numSamples = std::max(1u, pt->nr_samples); - SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format); - - size_t total_size = 0; - unsigned width = pt->width0; - unsigned height = pt->height0; - unsigned depth = pt->depth0; - unsigned layers = pt->array_size; - - for (int level = 0; level <= pt->last_level; level++) { - unsigned alignedWidth, alignedHeight; - unsigned num_slices; + if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { + res->swr.halign = KNOB_MACROTILE_X_DIM; + res->swr.valign = KNOB_MACROTILE_Y_DIM; + } else { + res->swr.halign = 1; + res->swr.valign = 1; + } - if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { - alignedWidth = align(width, KNOB_MACROTILE_X_DIM); - alignedHeight = align(height, KNOB_MACROTILE_Y_DIM); - } else { - alignedWidth = width; - alignedHeight = height; + unsigned halign = res->swr.halign * util_format_get_blockwidth(fmt); + unsigned width = align(pt->width0, halign); + if (pt->target == PIPE_TEXTURE_1D || pt->target == PIPE_TEXTURE_1D_ARRAY) { + for (int level = 1; level <= pt->last_level; level++) + width += align(u_minify(pt->width0, level), halign); + res->swr.pitch = util_format_get_blocksize(fmt); + res->swr.qpitch = util_format_get_nblocksx(fmt, width); + } else { + // The pitch is the overall width of the texture in bytes. Most of the + // time this is the pitch of level 0 since all the other levels fit + // underneath it. However in some degenerate situations, the width of + // level1 + level2 may be larger. In that case, we use those + // widths. This can happen if, e.g. halign is 32, and the width of level + // 0 is 32 or less. In that case, the aligned levels 1 and 2 will also + // be 32 each, adding up to 64. + unsigned valign = res->swr.valign * util_format_get_blockheight(fmt); + if (pt->last_level > 1) { + width = std::max( + width, + align(u_minify(pt->width0, 1), halign) + + align(u_minify(pt->width0, 2), halign)); } - - if (level == 0) { - res->alignedWidth = alignedWidth; - res->alignedHeight = alignedHeight; + res->swr.pitch = util_format_get_stride(fmt, width); + + // The qpitch is controlled by either the height of the second LOD, or + // the combination of all the later LODs. + unsigned height = align(pt->height0, valign); + if (pt->last_level == 1) { + height += align(u_minify(pt->height0, 1), valign); + } else if (pt->last_level > 1) { + unsigned level1 = align(u_minify(pt->height0, 1), valign); + unsigned level2 = 0; + for (int level = 2; level <= pt->last_level; level++) { + level2 += align(u_minify(pt->height0, level), valign); + } + height += std::max(level1, level2); } + res->swr.qpitch = util_format_get_nblocksy(fmt, height); + } - res->row_stride[level] = util_format_get_stride(fmt, alignedWidth); - res->img_stride[level] = - res->row_stride[level] * util_format_get_nblocksy(fmt, alignedHeight); - res->mip_offsets[level] = total_size; - - if (pt->target == PIPE_TEXTURE_3D) - num_slices = depth; - else if (pt->target == PIPE_TEXTURE_1D_ARRAY - || pt->target == PIPE_TEXTURE_2D_ARRAY - || pt->target == PIPE_TEXTURE_CUBE - || pt->target == PIPE_TEXTURE_CUBE_ARRAY) - num_slices = layers; - else - num_slices = 1; - - total_size += res->img_stride[level] * num_slices; - if (total_size > SWR_MAX_TEXTURE_SIZE) - return FALSE; + if (pt->target == PIPE_TEXTURE_3D) + res->swr.depth = pt->depth0; + else + res->swr.depth = pt->array_size; + + // Fix up swr format if necessary so that LOD offset computation works + if (res->swr.format == (SWR_FORMAT)-1) { + switch (util_format_get_blocksize(fmt)) { + default: + unreachable("Unexpected format block size"); + case 1: res->swr.format = R8_UINT; break; + case 2: res->swr.format = R16_UINT; break; + case 4: res->swr.format = R32_UINT; break; + case 8: + if (util_format_is_compressed(fmt)) + res->swr.format = BC4_UNORM; + else + res->swr.format = R32G32_UINT; + break; + case 16: + if (util_format_is_compressed(fmt)) + res->swr.format = BC5_UNORM; + else + res->swr.format = R32G32B32A32_UINT; + break; + } + } - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); + for (int level = 0; level <= pt->last_level; level++) { + res->mip_offsets[level] = + ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->swr); } - res->swr.halign = res->alignedWidth; - res->swr.valign = res->alignedHeight; - res->swr.pitch = res->row_stride[0]; + size_t total_size = + (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch; + if (total_size > SWR_MAX_TEXTURE_SIZE) + return false; if (allocate) { res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); if (res->has_depth && res->has_stencil) { - SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); - res->secondary.width = pt->width0; - res->secondary.height = pt->height0; - res->secondary.depth = pt->depth0; - res->secondary.type = SURFACE_2D; - res->secondary.tileMode = SWR_TILE_NONE; + res->secondary = res->swr; res->secondary.format = R8_UINT; - res->secondary.numSamples = (1 << pt->nr_samples); - res->secondary.pitch = res->alignedWidth * finfo.Bpp; + res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt); + + for (int level = 0; level <= pt->last_level; level++) { + res->secondary_mip_offsets[level] = + ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->secondary); + } res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( - res->alignedHeight * res->secondary.pitch, 64); + res->secondary.depth * res->secondary.qpitch * + res->secondary.pitch, 64); } } - return TRUE; + return true; } static boolean diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 04637e506f8..e4f9796ef19 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -34,6 +34,7 @@ #include "builder.h" #include "tgsi/tgsi_strings.h" +#include "util/u_format.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_struct.h" @@ -41,6 +42,7 @@ #include "swr_context.h" #include "swr_context_llvm.h" +#include "swr_resource.h" #include "swr_state.h" #include "swr_screen.h" @@ -85,18 +87,36 @@ swr_generate_sampler_key(const struct lp_tgsi_info &info, info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; for (unsigned i = 0; i < key.nr_sampler_views; i++) { if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + const struct pipe_sampler_view *view = + ctx->sampler_views[shader_type][i]; lp_sampler_static_texture_state( - &key.sampler[i].texture_state, - ctx->sampler_views[shader_type][i]); + &key.sampler[i].texture_state, view); + if (view) { + struct swr_resource *swr_res = swr_resource(view->texture); + const struct util_format_description *desc = + util_format_description(view->format); + if (swr_res->has_depth && swr_res->has_stencil && + !util_format_has_depth(desc)) + key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; + } } } } else { key.nr_sampler_views = key.nr_samplers; for (unsigned i = 0; i < key.nr_sampler_views; i++) { if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + const struct pipe_sampler_view *view = + ctx->sampler_views[shader_type][i]; lp_sampler_static_texture_state( - &key.sampler[i].texture_state, - ctx->sampler_views[shader_type][i]); + &key.sampler[i].texture_state, view); + if (view) { + struct swr_resource *swr_res = swr_resource(view->texture); + const struct util_format_description *desc = + util_format_description(view->format); + if (swr_res->has_depth && swr_res->has_stencil && + !util_format_has_depth(desc)) + key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; + } } } } diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index c4d7f0319ef..b0cbc21964e 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -717,25 +717,46 @@ swr_update_texture_state(struct swr_context *ctx, for (unsigned i = 0; i < num_sampler_views; i++) { struct pipe_sampler_view *view = ctx->sampler_views[shader_type][i]; + struct swr_jit_texture *jit_tex = &textures[i]; + memset(jit_tex, 0, sizeof(*jit_tex)); if (view) { struct pipe_resource *res = view->texture; struct swr_resource *swr_res = swr_resource(res); - struct swr_jit_texture *jit_tex = &textures[i]; - memset(jit_tex, 0, sizeof(*jit_tex)); + SWR_SURFACE_STATE *swr = &swr_res->swr; + size_t *mip_offsets = swr_res->mip_offsets; + if (swr_res->has_depth && swr_res->has_stencil && + !util_format_has_depth(util_format_description(view->format))) { + swr = &swr_res->secondary; + mip_offsets = swr_res->secondary_mip_offsets; + } + jit_tex->width = res->width0; jit_tex->height = res->height0; - jit_tex->depth = res->depth0; - jit_tex->first_level = view->u.tex.first_level; - jit_tex->last_level = view->u.tex.last_level; - jit_tex->base_ptr = swr_res->swr.pBaseAddress; + jit_tex->base_ptr = swr->pBaseAddress; + if (view->target != PIPE_BUFFER) { + jit_tex->first_level = view->u.tex.first_level; + jit_tex->last_level = view->u.tex.last_level; + if (view->target == PIPE_TEXTURE_3D) + jit_tex->depth = res->depth0; + else + jit_tex->depth = + view->u.tex.last_layer - view->u.tex.first_layer + 1; + jit_tex->base_ptr += view->u.tex.first_layer * + swr->qpitch * swr->pitch; + } else { + unsigned view_blocksize = util_format_get_blocksize(view->format); + jit_tex->base_ptr += view->u.buf.offset; + jit_tex->width = view->u.buf.size / view_blocksize; + jit_tex->depth = 1; + } for (unsigned level = jit_tex->first_level; level <= jit_tex->last_level; level++) { - jit_tex->row_stride[level] = swr_res->row_stride[level]; - jit_tex->img_stride[level] = swr_res->img_stride[level]; - jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; + jit_tex->row_stride[level] = swr->pitch; + jit_tex->img_stride[level] = swr->qpitch * swr->pitch; + jit_tex->mip_offsets[level] = mip_offsets[level]; } } } @@ -805,6 +826,61 @@ swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType) } } +static bool +swr_change_rt(struct swr_context *ctx, + unsigned attachment, + const struct pipe_surface *sf) +{ + swr_draw_context *pDC = &ctx->swrDC; + struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment]; + + /* Do nothing if the render target hasn't changed */ + if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr) + return false; + + /* Deal with disabling RT up front */ + if (!sf || !sf->texture) { + /* If detaching attachment, mark tiles as RESOLVED so core + * won't try to load from non-existent target. */ + swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED); + *rt = {0}; + return true; + } + + const struct swr_resource *swr = swr_resource(sf->texture); + const SWR_SURFACE_STATE *swr_surface = &swr->swr; + SWR_FORMAT fmt = mesa_to_swr_format(sf->format); + + if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) { + swr_surface = &swr->secondary; + fmt = swr_surface->format; + } + + if (rt->pBaseAddress == swr_surface->pBaseAddress && + rt->format == fmt && + rt->lod == sf->u.tex.level && + rt->arrayIndex == sf->u.tex.first_layer) + return false; + + bool need_fence = false; + + /* StoreTile for changed target */ + if (rt->pBaseAddress) { + /* If changing attachment to a new target, mark tiles as + * INVALID so they are reloaded from surface. */ + swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID); + need_fence = true; + } + + /* Make new attachment */ + *rt = *swr_surface; + rt->format = fmt; + rt->lod = sf->u.tex.level; + rt->arrayIndex = sf->u.tex.first_layer; + + return need_fence; +} + void swr_update_derived(struct pipe_context *pipe, const struct pipe_draw_info *p_draw_info) @@ -823,64 +899,30 @@ swr_update_derived(struct pipe_context *pipe, /* Render Targets */ if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { struct pipe_framebuffer_state *fb = &ctx->framebuffer; - SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0}; - UINT i; + const struct util_format_description *desc = NULL; + bool need_fence = false; /* colorbuffer targets */ - if (fb->nr_cbufs) - for (i = 0; i < fb->nr_cbufs; ++i) - if (fb->cbufs[i]) { - struct swr_resource *colorBuffer = - swr_resource(fb->cbufs[i]->texture); - new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr; - } - - /* depth/stencil target */ - if (fb->zsbuf) { - struct swr_resource *depthStencilBuffer = - swr_resource(fb->zsbuf->texture); - if (depthStencilBuffer->has_depth) { - new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr; - - if (depthStencilBuffer->has_stencil) - new_attachment[SWR_ATTACHMENT_STENCIL] = - &depthStencilBuffer->secondary; - - } else if (depthStencilBuffer->has_stencil) - new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr; + if (fb->nr_cbufs) { + for (unsigned i = 0; i < fb->nr_cbufs; ++i) + need_fence |= swr_change_rt( + ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]); } + for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i) + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL); - /* Make the attachment updates */ - swr_draw_context *pDC = &ctx->swrDC; - SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; - unsigned need_fence = FALSE; - for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) { - void *new_base = nullptr; - if (new_attachment[i]) - new_base = new_attachment[i]->pBaseAddress; - - /* StoreTile for changed target */ - if (renderTargets[i].pBaseAddress != new_base) { - if (renderTargets[i].pBaseAddress) { - /* If changing attachment to a new target, mark tiles as - * INVALID so they are reloaded from surface. - * If detaching attachment, mark tiles as RESOLVED so core - * won't try to load from non-existent target. */ - enum SWR_TILE_STATE post_state = (new_attachment[i] - ? SWR_TILE_INVALID : SWR_TILE_RESOLVED); - swr_store_render_target(pipe, i, post_state); - - need_fence |= TRUE; - } + /* depth/stencil target */ + if (fb->zsbuf) + desc = util_format_description(fb->zsbuf->format); + if (fb->zsbuf && util_format_has_depth(desc)) + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf); + else + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL); - /* Make new attachment */ - if (new_attachment[i]) - renderTargets[i] = *new_attachment[i]; - else - if (renderTargets[i].pBaseAddress) - renderTargets[i] = {0}; - } - } + if (fb->zsbuf && util_format_has_stencil(desc)) + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf); + else + need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL); /* This fence ensures any attachment changes are resolved before the * next draw */ -- 2.30.2