From: Bruce Cherniak Date: Fri, 5 May 2017 00:33:36 +0000 (-0500) Subject: swr: move msaa resolve to generalized StoreTile X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f52e63069a3fad23e03d42306a42bd20f0159da3;p=mesa.git swr: move msaa resolve to generalized StoreTile v3: list piglit tests fixed by this patch. Fixed typo Tim pointed out. v2: Reword commit message to more closely adhere to community guidelines. This patch moves msaa resolve down into core/StoreTiles where the surface format conversion routines are available. The previous "experimental" resolve was limited to 8-bit unsigned render targets. This fixes a number of piglit msaa tests by adding resolve support for all the render target formats we support. Specifically: layered-rendering/gl-layer-render: fail->pass layered-rendering/gl-layer-render-storage: fail->pass multisample-formats *[2,4,8,16] gl_arb_texture_rg: crash->pass multisample-formats *[2,4,8,16] gl_ext_texture_snorm: crash->pass multisample-formats *[2,4,8,16] gl_arb_texture_float: fail->pass multisample-formats *[2,4,8,16] gl_arb_texture_rg-float: fail->pass MSAA is still disabled by default, but can be enabled with "export SWR_MSAA_MAX_COUNT=4" (1,2,4,8,16 are options) The default is 0, which is disabled. This patch improves the number of multisample-formats supported by swr, and fixes several crashes currently in the 17.1 branch. Therefore, it should be considered for inclusion in the 17.1 stable release. Being disabled by default, it poses no risk to most users of swr. Reviewed-by: Tim Rowley cc: mesa-stable@lists.freedesktop.org --- diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h index ffde574c035..12a5f3d8ce1 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h @@ -1133,6 +1133,64 @@ struct StoreRasterTile } } } + + ////////////////////////////////////////////////////////////////////////// + /// @brief Resolves an 8x8 raster tile to the resolve destination surface. + /// @param pSrc - Pointer to raster tile. + /// @param pDstSurface - Destination surface state + /// @param x, y - Coordinates to raster tile. + /// @param sampleOffset - Offset between adjacent multisamples + INLINE static void Resolve( + uint8_t *pSrc, + SWR_SURFACE_STATE* pDstSurface, + uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile. + { + uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U); + uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U); + + float oneOverNumSamples = 1.0f / pDstSurface->numSamples; + + // For each raster tile pixel (rx, ry) + for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry) + { + for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx) + { + // Perform bounds checking. + if (((x + rx) < lodWidth) && + ((y + ry) < lodHeight)) + { + // Sum across samples + float resolveColor[4] = {0}; + for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++) + { + float sampleColor[4] = {0}; + uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum; + GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor); + resolveColor[0] += sampleColor[0]; + resolveColor[1] += sampleColor[1]; + resolveColor[2] += sampleColor[2]; + resolveColor[3] += sampleColor[3]; + } + + // Divide by numSamples to average + resolveColor[0] *= oneOverNumSamples; + resolveColor[1] *= oneOverNumSamples; + resolveColor[2] *= oneOverNumSamples; + resolveColor[3] *= oneOverNumSamples; + + // Use the resolve surface state + SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress; + uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress((x + rx), (y + ry), + pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex, + 0, pResolveSurface->lod, pResolveSurface); + { + ConvertPixelFromFloat(pDst, resolveColor); + } + } + } + } + } + }; template @@ -2316,6 +2374,9 @@ struct StoreMacroTile pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile::Store : OptStoreRasterTile::Store; } + // Save original for pSrcHotTile resolve. + uint8_t *pResolveSrcHotTile = pSrcHotTile; + // Store each raster tile from the hot tile to the destination surface. for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM) { @@ -2328,6 +2389,20 @@ struct StoreMacroTile } } } + + if (pDstSurface->pAuxBaseAddress) + { + uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits::bpp / 8); + // Store each raster tile from the hot tile to the destination surface. + for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM) + { + for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM) + { + StoreRasterTile::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex); + pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples; + } + } + } } }; diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index e8e465196db..11ce3464d44 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe, } -/* XXX: This resolve is incomplete and suboptimal. It will be removed once the - * pipelined resolve blit works. */ -void -swr_do_msaa_resolve(struct pipe_resource *src_resource, - struct pipe_resource *dst_resource) -{ - /* This is a pretty dumb inline resolve. It only supports 8-bit formats - * (ex RGBA8/BGRA8) - which are most common display formats anyway. - */ - - /* quick check for 8-bit and number of components */ - uint8_t bits_per_component = - util_format_get_component_bits(src_resource->format, - UTIL_FORMAT_COLORSPACE_RGB, 0); - - /* Unsupported resolve format */ - assert(src_resource->format == dst_resource->format); - assert(bits_per_component == 8); - if ((src_resource->format != dst_resource->format) || - (bits_per_component != 8)) { - return; - } - - uint8_t src_num_comps = util_format_get_nr_components(src_resource->format); - - SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr; - SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr; - - uint32_t *src, *dst, offset; - uint32_t num_samples = src_surface->numSamples; - float recip_num_samples = 1.0f / num_samples; - for (uint32_t y = 0; y < src_surface->height; y++) { - for (uint32_t x = 0; x < src_surface->width; x++) { - float r = 0.0f; - float g = 0.0f; - float b = 0.0f; - float a = 0.0f; - for (uint32_t sampleNum = 0; sampleNum < num_samples; sampleNum++) { - offset = ComputeSurfaceOffset(x, y, 0, 0, sampleNum, 0, src_surface); - src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps; - const uint32_t sample = *src; - r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples; - g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples; - b += (float)((sample >> 8) & 0xff) / 255.0f * recip_num_samples; - a += (float)((sample ) & 0xff) / 255.0f * recip_num_samples; - } - uint32_t result = 0; - result = ((uint8_t)(r * 255.0f) & 0xff) << 24; - result |= ((uint8_t)(g * 255.0f) & 0xff) << 16; - result |= ((uint8_t)(b * 255.0f) & 0xff) << 8; - result |= ((uint8_t)(a * 255.0f) & 0xff); - offset = ComputeSurfaceOffset(x, y, 0, 0, 0, 0, src_surface); - dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps; - *dst = result; - } - } -} - - static void swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) { @@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) debug_printf("swr_blit: color resolve : %d -> %d\n", info.src.resource->nr_samples, info.dst.resource->nr_samples); - /* Because the resolve is being done inline (not pipelined), - * resources need to be stored out of hottiles and the pipeline empty. - * - * Resources are marked unused following fence finish because all - * pipeline operations are complete. Validation of the blit will mark - * them are read/write again. - */ + /* Resolve is done as part of the surface store. */ swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED); - swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED); - swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0); - swr_resource_unused(info.src.resource); - swr_resource_unused(info.dst.resource); struct pipe_resource *src_resource = info.src.resource; struct pipe_resource *resolve_target = swr_resource(src_resource)->resolve_target; - /* Inline resolve samples into resolve target resource, then continue - * the blit. */ - swr_do_msaa_resolve(src_resource, resolve_target); - - /* The resolve target becomes the new source for the blit. */ + /* The resolve target becomes the new source for the blit. */ info.src.resource = resolve_target; } diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 04e613745a9..9254e735085 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen, /* Attach it to the multisample resource */ msaa_res->resolve_target = alt; + + /* Hang resolve surface state off the multisample surface state to so + * StoreTiles knows where to resolve the surface. */ + msaa_res->swr.pAuxBaseAddress = (uint8_t *)&swr_resource(alt)->swr; } return true; /* success */ @@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen, SwrEndFrame(swr_context(pipe)->swrContext); } - /* Multisample surfaces need to be resolved before present */ + /* Multisample resolved into resolve_target at flush with store_resource */ if (pipe && spr->swr.numSamples > 1) { struct pipe_resource *resolve_target = spr->resolve_target; - /* Do an inline surface resolve into the resolve target resource - * XXX: This works, just not optimal. Work on using a pipelined blit. */ - swr_do_msaa_resolve(resource, resolve_target); - /* Once resolved, copy into display target */ SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;