From 1832ef6cd9bdce4f546128c0b77d7acd6fd898a7 Mon Sep 17 00:00:00 2001 From: Bruce Cherniak Date: Thu, 13 Apr 2017 17:40:11 -0500 Subject: [PATCH] swr: Enable MSAA in OpenSWR software renderer This patch enables multisample antialiasing in the OpenSWR software renderer. MSAA is a proof-of-concept/work-in-progress with bug fixes and performance on the way. We wanted to get the changes out now to allow several customers to begin experimenting with MSAA in a software renderer. So as not to impact current customers, MSAA is turned off by default - previous functionality and performance remain intact. It is easily enabled via environment variables, as described below. It has only been tested with the glx-lib winsys. The intention is to enable other state-trackers, both Windows and Linux and more fully support FBOs. There are 2 environment variables that affect behavior: * SWR_MSAA_FORCE_ENABLE - force MSAA on, for apps that are not designed for MSAA... Beware, results will vary. This is mainly for testing. * SWR_MSAA_MAX_SAMPLE_COUNT - sets maximum supported number of samples (1,2,4,8,16), or 0 to disable MSAA altogether. (The default is currently 0.) Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/swr_context.cpp | 90 +++++++++++++- src/gallium/drivers/swr/swr_context.h | 3 + src/gallium/drivers/swr/swr_resource.h | 4 + src/gallium/drivers/swr/swr_screen.cpp | 159 ++++++++++++++++++++++-- src/gallium/drivers/swr/swr_screen.h | 8 ++ src/gallium/drivers/swr/swr_state.cpp | 74 +++++++++-- 6 files changed, 313 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 6f46d666ac5..aa5cca8e653 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -267,20 +267,104 @@ swr_resource_copy(struct pipe_context *pipe, } +/* XXX: This resolve is incomplete and suboptimal. It will be removed once the + * pipelined resolve blit works. */ +void +swr_do_msaa_resolve(struct pipe_resource *src_resource, + struct pipe_resource *dst_resource) +{ + /* This is a pretty dumb inline resolve. It only supports 8-bit formats + * (ex RGBA8/BGRA8) - which are most common display formats anyway. + */ + + /* quick check for 8-bit and number of components */ + uint8_t bits_per_component = + util_format_get_component_bits(src_resource->format, + UTIL_FORMAT_COLORSPACE_RGB, 0); + + /* Unsupported resolve format */ + assert(src_resource->format == dst_resource->format); + assert(bits_per_component == 8); + if ((src_resource->format != dst_resource->format) || + (bits_per_component != 8)) { + return; + } + + uint8_t src_num_comps = util_format_get_nr_components(src_resource->format); + + SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr; + SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr; + + uint32_t *src, *dst, offset; + uint32_t num_samples = src_surface->numSamples; + float recip_num_samples = 1.0f / num_samples; + for (uint32_t y = 0; y < src_surface->height; y++) { + for (uint32_t x = 0; x < src_surface->width; x++) { + float r = 0.0f; + float g = 0.0f; + float b = 0.0f; + float a = 0.0f; + for (uint32_t sampleNum = 0; sampleNum < num_samples; sampleNum++) { + offset = ComputeSurfaceOffset(x, y, 0, 0, sampleNum, 0, src_surface); + src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps; + const uint32_t sample = *src; + r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples; + g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples; + b += (float)((sample >> 8) & 0xff) / 255.0f * recip_num_samples; + a += (float)((sample ) & 0xff) / 255.0f * recip_num_samples; + } + uint32_t result = 0; + result = ((uint8_t)(r * 255.0f) & 0xff) << 24; + result |= ((uint8_t)(g * 255.0f) & 0xff) << 16; + result |= ((uint8_t)(b * 255.0f) & 0xff) << 8; + result |= ((uint8_t)(a * 255.0f) & 0xff); + offset = ComputeSurfaceOffset(x, y, 0, 0, 0, 0, src_surface); + dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps; + *dst = result; + } + } +} + + static void swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) { struct swr_context *ctx = swr_context(pipe); + /* Make a copy of the const blit_info, so we can modify it */ struct pipe_blit_info info = *blit_info; - if (blit_info->render_condition_enable && !swr_check_render_cond(pipe)) + if (info.render_condition_enable && !swr_check_render_cond(pipe)) return; if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 && !util_format_is_depth_or_stencil(info.src.resource->format) && !util_format_is_pure_integer(info.src.resource->format)) { - debug_printf("swr: color resolve unimplemented\n"); - return; + debug_printf("swr_blit: color resolve : %d -> %d\n", + info.src.resource->nr_samples, info.dst.resource->nr_samples); + + /* Because the resolve is being done inline (not pipelined), + * resources need to be stored out of hottiles and the pipeline empty. + * + * Resources are marked unused following fence finish because all + * pipeline operations are complete. Validation of the blit will mark + * them are read/write again. + */ + swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED); + swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED); + swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0); + swr_resource_unused(info.src.resource); + swr_resource_unused(info.dst.resource); + + struct pipe_resource *src_resource = info.src.resource; + struct pipe_resource *resolve_target = + swr_resource(src_resource)->resolve_target; + + /* Inline resolve samples into resolve target resource, then continue + * the blit. */ + swr_do_msaa_resolve(src_resource, resolve_target); + + /* The resolve target becomes the new source for the blit. */ + info.src.resource = resolve_target; } if (util_try_blit_via_copy_region(pipe, &info)) { diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index be65a20c85b..233d95ab167 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -196,4 +196,7 @@ void swr_clear_init(struct pipe_context *pipe); void swr_draw_init(struct pipe_context *pipe); void swr_finish(struct pipe_context *pipe); + +void swr_do_msaa_resolve(struct pipe_resource *src_resource, + struct pipe_resource *dst_resource); #endif diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h index 41abd77cba0..ae9954c1e7d 100644 --- a/src/gallium/drivers/swr/swr_resource.h +++ b/src/gallium/drivers/swr/swr_resource.h @@ -46,6 +46,10 @@ struct swr_resource { struct sw_displaytarget *display_target; + /* If resource is multisample, then this points to a alternate resource + * containing the resolved multisample surface, otherwise null */ + struct pipe_resource *resolve_target; + size_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; size_t secondary_mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 87fd898eb11..f88989b0e21 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -61,6 +61,11 @@ #define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */ #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */ +/* Flag indicates creation of alternate surface, to prevent recursive loop + * in resource creation when msaa_force_enable is set. */ +#define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) + + static const char * swr_get_name(struct pipe_screen *screen) { @@ -78,13 +83,14 @@ swr_get_vendor(struct pipe_screen *screen) } static boolean -swr_is_format_supported(struct pipe_screen *screen, +swr_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned bind) { - struct sw_winsys *winsys = swr_screen(screen)->winsys; + struct swr_screen *screen = swr_screen(_screen); + struct sw_winsys *winsys = screen->winsys; const struct util_format_description *format_desc; assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D @@ -100,7 +106,8 @@ swr_is_format_supported(struct pipe_screen *screen, if (!format_desc) return FALSE; - if (sample_count > 1) + if ((sample_count > screen->msaa_max_count) + || !util_is_power_of_two(sample_count)) return FALSE; if (bind & PIPE_BIND_DISPLAY_TARGET) { @@ -235,7 +242,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_UMA: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: @@ -250,6 +256,15 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DOUBLES: return 1; + /* MSAA support + * If user has explicitly set max_sample_count = 0 (via SWR_MSAA_MAX_COUNT) + * then disable all MSAA support and go back to old caps. */ + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + return swr_screen(screen)->msaa_max_count ? 1 : 0; + case PIPE_CAP_FAKE_SW_MSAA: + return swr_screen(screen)->msaa_max_count ? 0 : 1; + /* unsupported features */ case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: @@ -264,7 +279,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: @@ -276,7 +290,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: - case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: @@ -712,6 +725,14 @@ swr_texture_layout(struct swr_screen *screen, if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { res->swr.halign = KNOB_MACROTILE_X_DIM; res->swr.valign = KNOB_MACROTILE_Y_DIM; + + /* If SWR_MSAA_FORCE_ENABLE is set, turn on MSAA and override requested + * surface sample count. */ + if (screen->msaa_force_enable) { + res->swr.numSamples = screen->msaa_max_count; + fprintf(stderr,"swr_texture_layout: forcing sample count: %d\n", + res->swr.numSamples); + } } else { res->swr.halign = 1; res->swr.valign = 1; @@ -790,8 +811,8 @@ swr_texture_layout(struct swr_screen *screen, ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->swr); } - size_t total_size = - (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch; + size_t total_size = res->swr.depth * res->swr.qpitch * res->swr.pitch * + res->swr.numSamples; if (total_size > SWR_MAX_TEXTURE_SIZE) return false; @@ -808,9 +829,11 @@ swr_texture_layout(struct swr_screen *screen, ComputeSurfaceOffset(0, 0, 0, 0, 0, level, &res->secondary); } - res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( - res->secondary.depth * res->secondary.qpitch * - res->secondary.pitch, 64); + total_size = res->secondary.depth * res->secondary.qpitch * + res->secondary.pitch * res->secondary.numSamples; + + res->secondary.pBaseAddress = (uint8_t *) AlignedMalloc(total_size, + 64); } } @@ -827,6 +850,51 @@ swr_can_create_resource(struct pipe_screen *screen, return swr_texture_layout(swr_screen(screen), &res, false); } +/* Helper function that conditionally creates a single-sample resolve resource + * and attaches it to main multisample resource. */ +static boolean +swr_create_resolve_resource(struct pipe_screen *_screen, + struct swr_resource *msaa_res) +{ + struct swr_screen *screen = swr_screen(_screen); + + /* If resource is multisample, create a single-sample resolve resource */ + if (msaa_res->base.nr_samples > 1 || (screen->msaa_force_enable && + !(msaa_res->base.flags & SWR_RESOURCE_FLAG_ALT_SURFACE))) { + + /* Create a single-sample copy of the resource. Copy the original + * resource parameters and set flag to prevent recursion when re-calling + * resource_create */ + struct pipe_resource alt_template = msaa_res->base; + alt_template.nr_samples = 0; + alt_template.flags |= SWR_RESOURCE_FLAG_ALT_SURFACE; + + /* Note: Display_target is a special single-sample resource, only the + * display_target has been created already. */ + if (msaa_res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT + | PIPE_BIND_SHARED)) { + /* Allocate the multisample buffers. */ + if (!swr_texture_layout(screen, msaa_res, true)) + return false; + + /* Alt resource will only be bound as PIPE_BIND_RENDER_TARGET + * remove the DISPLAY_TARGET, SCANOUT, and SHARED bindings */ + alt_template.bind = PIPE_BIND_RENDER_TARGET; + } + + /* Allocate single-sample resolve surface */ + struct pipe_resource *alt; + alt = _screen->resource_create(_screen, &alt_template); + if (!alt) + return false; + + /* Attach it to the multisample resource */ + msaa_res->resolve_target = alt; + } + + return true; /* success */ +} + static struct pipe_resource * swr_resource_create(struct pipe_screen *_screen, const struct pipe_resource *templat) @@ -845,7 +913,7 @@ swr_resource_create(struct pipe_screen *_screen, | PIPE_BIND_SHARED)) { /* displayable surface * first call swr_texture_layout without allocating to finish - * filling out the SWR_SURFAE_STATE in res */ + * filling out the SWR_SURFACE_STATE in res */ swr_texture_layout(screen, res, false); if (!swr_displaytarget_layout(screen, res)) goto fail; @@ -854,6 +922,12 @@ swr_resource_create(struct pipe_screen *_screen, if (!swr_texture_layout(screen, res, true)) goto fail; } + + /* If resource was multisample, create resolve resource and attach + * it to multisample resource. */ + if (!swr_create_resolve_resource(_screen, res)) + goto fail; + } else { /* other data (vertex buffer, const buffer, etc) */ assert(util_format_get_blocksize(templat->format) == 1); @@ -862,7 +936,7 @@ swr_resource_create(struct pipe_screen *_screen, assert(templat->last_level == 0); /* Easiest to just call swr_texture_layout, as it sets up - * SWR_SURFAE_STATE in res */ + * SWR_SURFACE_STATE in res */ if (!swr_texture_layout(screen, res, true)) goto fail; } @@ -888,9 +962,24 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, spr->display_target); + if (spr->swr.numSamples > 1) { + /* Free an attached resolve resource */ + struct swr_resource *alt = swr_resource(spr->resolve_target); + swr_fence_work_free(screen->flush_fence, alt->swr.pBaseAddress, true); + + /* Free multisample buffer */ + swr_fence_work_free(screen->flush_fence, spr->swr.pBaseAddress, true); + } } else { /* For regular resources, defer deletion */ swr_resource_unused(pt); + + if (spr->swr.numSamples > 1) { + /* Free an attached resolve resource */ + struct swr_resource *alt = swr_resource(spr->resolve_target); + swr_fence_work_free(screen->flush_fence, alt->swr.pBaseAddress, true); + } + swr_fence_work_free(screen->flush_fence, spr->swr.pBaseAddress, true); swr_fence_work_free(screen->flush_fence, spr->secondary.pBaseAddress, true); @@ -919,6 +1008,23 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen, SwrEndFrame(swr_context(pipe)->swrContext); } + /* Multisample surfaces need to be resolved before present */ + if (pipe && spr->swr.numSamples > 1) { + struct pipe_resource *resolve_target = spr->resolve_target; + + /* Do an inline surface resolve into the resolve target resource + * XXX: This works, just not optimal. Work on using a pipelined blit. */ + swr_do_msaa_resolve(resource, resolve_target); + + /* Once resolved, copy into display target */ + SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr; + + void *map = winsys->displaytarget_map(winsys, spr->display_target, + PIPE_TRANSFER_WRITE); + memcpy(map, resolve->pBaseAddress, resolve->pitch * resolve->height); + winsys->displaytarget_unmap(winsys, spr->display_target); + } + debug_assert(spr->display_target); if (spr->display_target) winsys->displaytarget_display( @@ -986,6 +1092,33 @@ swr_create_screen_internal(struct sw_winsys *winsys) util_format_s3tc_init(); + /* XXX msaa under development, disable by default for now */ + screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */ + + /* validate env override values, within range and power of 2 */ + int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0); + if (msaa_max_count) { + if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES) + || !util_is_power_of_two(msaa_max_count)) { + fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count); + fprintf(stderr, "must be power of 2 between 1 and %d" \ + " (or 0 to disable msaa)\n", + SWR_MAX_NUM_MULTISAMPLES); + msaa_max_count = 0; + } + + fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count); + if (!msaa_max_count) + fprintf(stderr, "(msaa disabled)\n"); + + screen->msaa_max_count = msaa_max_count; + } + + screen->msaa_force_enable = debug_get_bool_option( + "SWR_MSAA_FORCE_ENABLE", false); + if (screen->msaa_force_enable) + fprintf(stderr, "SWR_MSAA_FORCE_ENABLE: true\n"); + return &screen->base; } diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h index 0c82a2eff7a..dc1bb47f02d 100644 --- a/src/gallium/drivers/swr/swr_screen.h +++ b/src/gallium/drivers/swr/swr_screen.h @@ -24,10 +24,15 @@ #ifndef SWR_SCREEN_H #define SWR_SCREEN_H +#include "swr_resource.h" + #include "pipe/p_screen.h" #include "pipe/p_defines.h" +#include "util/u_format.h" #include "api.h" +#include "memory/TilingFunctions.h" + struct sw_winsys; struct swr_screen { @@ -38,6 +43,9 @@ struct swr_screen { struct sw_winsys *winsys; + boolean msaa_force_enable; + uint8_t msaa_max_count; + HANDLE hJitMgr; }; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 5cc01ddcab0..aa9fe099f0a 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -30,6 +30,7 @@ #include "common/os.h" #include "jit_api.h" #include "gen_state_llvm.h" +#include "core/multisample.h" #include "gallivm/lp_bld_tgsi.h" #include "util/u_format.h" @@ -668,6 +669,9 @@ swr_set_framebuffer_state(struct pipe_context *pipe, if (changed) { util_copy_framebuffer_state(&ctx->framebuffer, fb); + /* 0 and 1 both indicate no msaa. Core doesn't understand 0 samples */ + ctx->framebuffer.samples = std::max((ubyte)1, ctx->framebuffer.samples); + ctx->dirty |= SWR_NEW_FRAMEBUFFER; } } @@ -684,6 +688,36 @@ swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) } } +/* + * MSAA fixed sample position table + * used by update_derived and get_sample_position + * (integer locations on a 16x16 grid) + */ +static const uint8_t swr_sample_positions[][2] = +{ /* 1x*/ { 8, 8}, + /* 2x*/ {12,12},{ 4, 4}, + /* 4x*/ { 6, 2},{14, 6},{ 2,10},{10,14}, + /* 8x*/ { 9, 5},{ 7,11},{13, 9},{ 5, 3}, + { 3,13},{ 1, 7},{11,15},{15, 1}, + /*16x*/ { 9, 9},{ 7, 5},{ 5,10},{12, 7}, + { 3, 6},{10,13},{13,11},{11, 3}, + { 6,14},{ 8, 1},{ 4, 2},{ 2,12}, + { 0, 8},{15, 4},{14,15},{ 1, 0} }; + +static void +swr_get_sample_position(struct pipe_context *pipe, + unsigned sample_count, unsigned sample_index, + float *out_value) +{ + /* validate sample_count */ + sample_count = GetNumSamples(GetSampleCount(sample_count)); + + const uint8_t *sample = swr_sample_positions[sample_count-1 + sample_index]; + out_value[0] = sample[0] / 16.0f; + out_value[1] = sample[1] / 16.0f; +} + + /* * Update resource in-use status * All resources bound to color or depth targets marked as WRITE resources. @@ -1060,9 +1094,30 @@ swr_update_derived(struct pipe_context *pipe, rastState->pointSpriteTopOrigin = rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT; - /* XXX TODO: Add multisample */ - rastState->sampleCount = SWR_MULTISAMPLE_1X; + /* If SWR_MSAA_FORCE_ENABLE is set, turn msaa on */ + if (screen->msaa_force_enable && !rasterizer->multisample) { + /* Force enable and use the value the surface was created with */ + rasterizer->multisample = true; + fb->samples = swr_resource(fb->cbufs[0]->texture)->swr.numSamples; + fprintf(stderr,"msaa force enable: %d samples\n", fb->samples); + } + + rastState->sampleCount = GetSampleCount(fb->samples); rastState->forcedSampleCount = false; + rastState->bIsCenterPattern = !rasterizer->multisample; + rastState->pixelLocation = SWR_PIXEL_LOCATION_CENTER; + + /* Only initialize sample positions if msaa is enabled */ + if (rasterizer->multisample) { + for (uint32_t i = 0; i < fb->samples; i++) { + const uint8_t *sample = swr_sample_positions[fb->samples-1 + i]; + rastState->samplePositions.SetXi(i, sample[0] << 4); + rastState->samplePositions.SetYi(i, sample[1] << 4); + rastState->samplePositions.SetX (i, sample[0] / 16.0f); + rastState->samplePositions.SetY (i, sample[1] / 16.0f); + } + rastState->samplePositions.PrecalcSampleData(fb->samples); + } bool do_offset = false; switch (rasterizer->fill_front) { @@ -1375,9 +1430,9 @@ swr_update_derived(struct pipe_context *pipe, psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL; psState.writesODepth = ctx->fs->info.base.writes_z; psState.usesSourceDepth = ctx->fs->info.base.reads_z; - psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX + psState.shadingRate = SWR_SHADING_RATE_PIXEL; psState.numRenderTargets = ctx->framebuffer.nr_cbufs; - psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa + psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; uint32_t barycentricsMask = 0; #if 0 // when we switch to mesa-master @@ -1507,6 +1562,7 @@ swr_update_derived(struct pipe_context *pipe, /* Blend State */ if (ctx->dirty & (SWR_NEW_BLEND | + SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER | SWR_NEW_DEPTH_STENCIL_ALPHA)) { struct pipe_framebuffer_state *fb = &ctx->framebuffer; @@ -1520,9 +1576,8 @@ swr_update_derived(struct pipe_context *pipe, blendState.alphaTestReference = *((uint32_t*)&ctx->depth_stencil->alpha.ref_value); - // XXX MSAA - blendState.sampleMask = 0; - blendState.sampleCount = SWR_MULTISAMPLE_1X; + blendState.sampleMask = ctx->sample_mask; + blendState.sampleCount = GetSampleCount(fb->samples); /* If there are no color buffers bound, disable writes on RT0 * and skip loop */ @@ -1578,8 +1633,8 @@ swr_update_derived(struct pipe_context *pipe, compileState.blendState.alphaBlendFunc); compileState.desc.alphaToCoverageEnable = ctx->blend->pipe.alpha_to_coverage; - compileState.desc.sampleMaskEnable = 0; // XXX - compileState.desc.numSamples = 1; // XXX + compileState.desc.sampleMaskEnable = (blendState.sampleMask != 0); + compileState.desc.numSamples = fb->samples; compileState.alphaTestFunction = swr_convert_depth_func(ctx->depth_stencil->alpha.func); @@ -1781,6 +1836,7 @@ swr_state_init(struct pipe_context *pipe) pipe->set_stencil_ref = swr_set_stencil_ref; pipe->set_sample_mask = swr_set_sample_mask; + pipe->get_sample_position = swr_get_sample_position; pipe->create_stream_output_target = swr_create_so_target; pipe->stream_output_target_destroy = swr_destroy_so_target; -- 2.30.2