From 07955d4f2b969efb59b9c35c1fba5a0cae2cdc55 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 30 Jul 2013 22:29:28 +0200 Subject: [PATCH] radeonsi: implement uncompressed MSAA rendering and color resolving MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is basic MSAA support which should work with most apps. Some features are missing, those will be implemented by other commits. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/r600_blit.c | 132 ++++++++- src/gallium/drivers/radeonsi/r600_resource.h | 1 + src/gallium/drivers/radeonsi/r600_texture.c | 7 +- src/gallium/drivers/radeonsi/radeonsi_pipe.c | 1 + src/gallium/drivers/radeonsi/radeonsi_pipe.h | 2 + src/gallium/drivers/radeonsi/si_state.c | 289 +++++++++++++++++-- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 3 +- src/gallium/drivers/radeonsi/sid.h | 10 + 9 files changed, 423 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c index bdd9bb43c10..7ac92d4ac14 100644 --- a/src/gallium/drivers/radeonsi/r600_blit.c +++ b/src/gallium/drivers/radeonsi/r600_blit.c @@ -43,6 +43,8 @@ enum r600_blitter_op /* bitmask */ R600_DISABLE_RENDER_COND, R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND, + + R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND }; static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) @@ -463,22 +465,146 @@ static void r600_resource_copy_region(struct pipe_context *ctx, r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]); } +static boolean is_simple_msaa_resolve(const struct pipe_blit_info *info) +{ + unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); + unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); + struct r600_texture *dst = (struct r600_texture*)info->dst.resource; + unsigned dst_tile_mode = dst->surface.level[info->dst.level].mode; + + return info->dst.resource->format == info->src.resource->format && + info->dst.resource->format == info->dst.format && + info->src.resource->format == info->src.format && + !info->scissor_enable && + info->mask == PIPE_MASK_RGBA && + dst_width == info->src.resource->width0 && + dst_height == info->src.resource->height0 && + info->dst.box.x == 0 && + info->dst.box.y == 0 && + info->dst.box.width == dst_width && + info->dst.box.height == dst_height && + info->src.box.x == 0 && + info->src.box.y == 0 && + info->src.box.width == dst_width && + info->src.box.height == dst_height && + /* Dst must be tiled. If it's not, we have to use a temporary + * resource which is tiled. */ + dst_tile_mode >= RADEON_SURF_MODE_1D; +} + +/* For MSAA integer resolving to work, we change the format to NORM using this function. */ +static enum pipe_format int_to_norm_format(enum pipe_format format) +{ + switch (format) { +#define REPLACE_FORMAT_SIGN(format,sign) \ + case PIPE_FORMAT_##format##_##sign##INT: \ + return PIPE_FORMAT_##format##_##sign##NORM +#define REPLACE_FORMAT(format) \ + REPLACE_FORMAT_SIGN(format, U); \ + REPLACE_FORMAT_SIGN(format, S) + + REPLACE_FORMAT_SIGN(B10G10R10A2, U); + REPLACE_FORMAT(R8); + REPLACE_FORMAT(R8G8); + REPLACE_FORMAT(R8G8B8X8); + REPLACE_FORMAT(R8G8B8A8); + REPLACE_FORMAT(A8); + REPLACE_FORMAT(I8); + REPLACE_FORMAT(L8); + REPLACE_FORMAT(L8A8); + REPLACE_FORMAT(R16); + REPLACE_FORMAT(R16G16); + REPLACE_FORMAT(R16G16B16X16); + REPLACE_FORMAT(R16G16B16A16); + REPLACE_FORMAT(A16); + REPLACE_FORMAT(I16); + REPLACE_FORMAT(L16); + REPLACE_FORMAT(L16A16); + +#undef REPLACE_FORMAT +#undef REPLACE_FORMAT_SIGN + default: + return format; + } +} + +static void si_msaa_color_resolve(struct pipe_context *ctx, + const struct pipe_blit_info *info) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct pipe_screen *screen = ctx->screen; + struct pipe_resource *tmp, templ; + struct pipe_blit_info blit; + unsigned sample_mask = ~0; + + assert(info->src.level == 0); + assert(info->src.box.depth == 1); + assert(info->dst.box.depth == 1); + + if (is_simple_msaa_resolve(info)) { + r600_blitter_begin(ctx, R600_COLOR_RESOLVE); + util_blitter_custom_resolve_color(rctx->blitter, + info->dst.resource, info->dst.level, + info->dst.box.z, + info->src.resource, info->src.box.z, + sample_mask, rctx->custom_blend_resolve, + int_to_norm_format(info->dst.format)); + r600_blitter_end(ctx); + return; + } + + /* resolve into a temporary texture, then blit */ + templ.target = PIPE_TEXTURE_2D; + templ.format = info->src.resource->format; + templ.width0 = info->src.resource->width0; + templ.height0 = info->src.resource->height0; + templ.depth0 = 1; + templ.array_size = 1; + templ.last_level = 0; + templ.nr_samples = 0; + templ.usage = PIPE_USAGE_STATIC; + templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + templ.flags = R600_RESOURCE_FLAG_FORCE_TILING; /* dst must not have a linear layout */ + + tmp = screen->resource_create(screen, &templ); + + /* resolve */ + r600_blitter_begin(ctx, R600_COLOR_RESOLVE); + util_blitter_custom_resolve_color(rctx->blitter, + tmp, 0, 0, + info->src.resource, info->src.box.z, + sample_mask, rctx->custom_blend_resolve, + int_to_norm_format(tmp->format)); + r600_blitter_end(ctx); + + /* blit */ + blit = *info; + blit.src.resource = tmp; + blit.src.box.z = 0; + + r600_blitter_begin(ctx, R600_BLIT); + util_blitter_blit(rctx->blitter, &blit); + r600_blitter_end(ctx); + + pipe_resource_reference(&tmp, NULL); +} + static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) { struct r600_context *rctx = (struct r600_context*)ctx; struct r600_texture *rsrc = (struct r600_texture*)info->src.resource; - assert(util_blitter_is_blit_supported(rctx->blitter, info)); - if (info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 && !util_format_is_depth_or_stencil(info->src.resource->format) && !util_format_is_pure_integer(info->src.resource->format)) { - debug_printf("radeonsi: color resolve is unimplemented\n"); + si_msaa_color_resolve(ctx, info); return; } + assert(util_blitter_is_blit_supported(rctx->blitter, info)); + if (rsrc->is_depth && !rsrc->is_flushing_texture) { si_blit_decompress_depth_in_place(rctx, rsrc, info->src.level, info->src.level, diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h index 24db2a90dae..ca8121f32f9 100644 --- a/src/gallium/drivers/radeonsi/r600_resource.h +++ b/src/gallium/drivers/radeonsi/r600_resource.h @@ -28,6 +28,7 @@ /* flag to indicate a resource is to be used as a transfer so should not be tiled */ #define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) +#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) /* Texture transfer. */ struct r600_transfer { diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c index 9c0b75b6656..185d987587d 100644 --- a/src/gallium/drivers/radeonsi/r600_texture.c +++ b/src/gallium/drivers/radeonsi/r600_texture.c @@ -102,8 +102,9 @@ static int r600_init_surface(struct r600_screen *rscreen, } } - surface->nsamples = 1; + surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1; surface->flags = 0; + switch (array_mode) { case V_009910_ARRAY_1D_TILED_THIN1: surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE); @@ -529,7 +530,9 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && !(templ->bind & PIPE_BIND_SCANOUT)) { - if (util_format_is_compressed(templ->format)) { + if (templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) { + array_mode = V_009910_ARRAY_2D_TILED_THIN1; + } else if (util_format_is_compressed(templ->format)) { array_mode = V_009910_ARRAY_1D_TILED_THIN1; } else { if (rscreen->chip_class >= CIK) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index 9afc7f2714b..e530ce04401 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -189,6 +189,7 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_depth); rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_stencil); rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_inplace); + rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_resolve); util_unreference_framebuffer_state(&rctx->framebuffer); util_blitter_destroy(rctx->blitter); diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 674c6303b7a..fa6a5399880 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -140,6 +140,7 @@ struct r600_context { void *custom_dsa_flush_depth; void *custom_dsa_flush_stencil; void *custom_dsa_flush_inplace; + void *custom_blend_resolve; struct r600_screen *screen; struct radeon_winsys *ws; @@ -152,6 +153,7 @@ struct r600_context { struct si_vertex_element *vertex_elements; struct pipe_framebuffer_state framebuffer; + unsigned fb_log_samples; unsigned pa_sc_line_stipple; unsigned pa_su_sc_mode_cntl; /* for saving when using blitter */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7d637e75189..ccd826e0ec6 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -243,8 +243,9 @@ static uint32_t si_translate_blend_factor(int blend_fact) return 0; } -static void *si_create_blend_state(struct pipe_context *ctx, - const struct pipe_blend_state *state) +static void *si_create_blend_state_mode(struct pipe_context *ctx, + const struct pipe_blend_state *state, + unsigned mode) { struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); struct si_pm4_state *pm4 = &blend->pm4; @@ -254,7 +255,7 @@ static void *si_create_blend_state(struct pipe_context *ctx, if (blend == NULL) return NULL; - color_control = S_028808_MODE(V_028808_CB_NORMAL); + color_control = S_028808_MODE(mode); if (state->logicop_enable) { color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); } else { @@ -262,8 +263,12 @@ static void *si_create_blend_state(struct pipe_context *ctx, } si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); - si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, ~0); - si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, ~0); + si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, + S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | + S_028B70_ALPHA_TO_MASK_OFFSET0(2) | + S_028B70_ALPHA_TO_MASK_OFFSET1(2) | + S_028B70_ALPHA_TO_MASK_OFFSET2(2) | + S_028B70_ALPHA_TO_MASK_OFFSET3(2)); blend->cb_target_mask = 0; for (int i = 0; i < 8; i++) { @@ -304,6 +309,12 @@ static void *si_create_blend_state(struct pipe_context *ctx, return blend; } +static void *si_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); +} + static void si_bind_blend_state(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -579,11 +590,12 @@ static void *si_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, - S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable)); + S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | + S_028A48_MSAA_ENABLE(state->multisample)); - si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, 0x00000400); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, - S_028BE4_PIX_CENTER(state->half_pixel_center)); + S_028BE4_PIX_CENTER(state->half_pixel_center) | + S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); @@ -749,7 +761,6 @@ static void *si_create_dsa_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); - si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00); dsa->db_render_override = db_render_override; return dsa; @@ -1660,6 +1671,7 @@ boolean si_is_format_supported(struct pipe_screen *screen, unsigned sample_count, unsigned usage) { + struct r600_screen *rscreen = (struct r600_screen *)screen; unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { @@ -1670,9 +1682,19 @@ boolean si_is_format_supported(struct pipe_screen *screen, if (!util_format_is_supported(format, usage)) return FALSE; - /* Multisample */ - if (sample_count > 1) - return FALSE; + if (sample_count > 1) { + if (rscreen->chip_class >= CIK) + return FALSE; + + switch (sample_count) { + case 2: + case 4: + case 8: + break; + default: + return FALSE; + } + } if ((usage & PIPE_BIND_SAMPLER_VIEW) && si_is_sampler_format_supported(screen, format)) { @@ -1823,6 +1845,12 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); + if (rtex->resource.b.b.nr_samples > 1) { + unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); + color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | + S_028C74_NUM_FRAGMENTS(log_samples); + } + offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture); offset >>= 8; @@ -1905,6 +1933,10 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4, db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); z_info = S_028040_FORMAT(format); + if (rtex->resource.b.b.nr_samples > 1) { + z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); + } + if (rtex->surface.flags & RADEON_SURF_SBUFFER) s_info = S_028044_FORMAT(V_028044_STENCIL_8); else @@ -1969,13 +2001,205 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4, si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice)); } +#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ + (((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \ + (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \ + (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \ + (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28)) + +/* 2xMSAA + * There are two locations (-4, 4), (4, -4). */ +static uint32_t sample_locs_2x[] = { + FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), + FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), + FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), + FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), +}; +static unsigned max_dist_2x = 4; +/* 4xMSAA + * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */ +static uint32_t sample_locs_4x[] = { + FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), + FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), + FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), + FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), +}; +static unsigned max_dist_4x = 6; +/* Cayman/SI 8xMSAA */ +static uint32_t cm_sample_locs_8x[] = { + FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), + FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), + FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), + FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), + FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), + FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), + FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), + FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), +}; +static unsigned cm_max_dist_8x = 8; +/* Cayman/SI 16xMSAA */ +static uint32_t cm_sample_locs_16x[] = { + FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), + FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), + FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), + FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), + FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), + FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), + FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), + FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), + FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), + FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), + FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), + FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), + FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), + FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), + FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), + FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), +}; +static unsigned cm_max_dist_16x = 8; + +static void si_get_sample_position(struct pipe_context *ctx, + unsigned sample_count, + unsigned sample_index, + float *out_value) +{ + int offset, index; + struct { + int idx:4; + } val; + switch (sample_count) { + case 1: + default: + out_value[0] = out_value[1] = 0.5; + break; + case 2: + offset = 4 * (sample_index * 2); + val.idx = (sample_locs_2x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 4: + offset = 4 * (sample_index * 2); + val.idx = (sample_locs_4x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 8: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 16: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + } +} + +static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples) +{ + unsigned max_dist = 0; + + switch (nr_samples) { + default: + nr_samples = 0; + break; + case 2: + si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]); + si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]); + si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]); + si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]); + max_dist = max_dist_2x; + break; + case 4: + si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]); + si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]); + si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]); + si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]); + max_dist = max_dist_4x; + break; + case 8: + si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]); + si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]); + si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0); + si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0); + si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]); + si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]); + si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0); + si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0); + si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]); + si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]); + si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0); + si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0); + si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]); + si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]); + max_dist = cm_max_dist_8x; + break; + case 16: + si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]); + si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]); + si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]); + si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]); + si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]); + si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]); + si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]); + si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]); + si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]); + si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]); + si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]); + si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]); + si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]); + si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]); + si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]); + si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]); + max_dist = cm_max_dist_16x; + break; + } + + if (nr_samples > 1) { + unsigned log_samples = util_logbase2(nr_samples); + + si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, + S_028BDC_LAST_PIXEL(1) | + S_028BDC_EXPAND_LINE_WIDTH(1)); + si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, + S_028BE0_MSAA_NUM_SAMPLES(log_samples) | + S_028BE0_MAX_SAMPLE_DIST(max_dist) | + S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); + + si_pm4_set_reg(pm4, R_028804_DB_EQAA, + S_028804_MAX_ANCHOR_SAMPLES(log_samples) | + S_028804_PS_ITER_SAMPLES(log_samples) | + S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | + S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); + } else { + si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1)); + si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0); + + si_pm4_set_reg(pm4, R_028804_DB_EQAA, + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); + } +} + static void si_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); uint32_t tl, br; - int tl_x, tl_y, br_x, br_y; + int tl_x, tl_y, br_x, br_y, nr_samples; if (pm4 == NULL) return; @@ -2013,7 +2237,16 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br); si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000); si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); - si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000); + + if (state->nr_cbufs) + nr_samples = state->cbufs[0]->texture->nr_samples; + else if (state->zsbuf) + nr_samples = state->zsbuf->texture->nr_samples; + else + nr_samples = 0; + + si_set_msaa_state(rctx, pm4, nr_samples); + rctx->fb_log_samples = util_logbase2(nr_samples); si_pm4_set_state(rctx, framebuffer, pm4); si_update_fb_rs_state(rctx); @@ -2642,8 +2875,19 @@ static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, si_pm4_set_state(rctx, ps_sampler, pm4); } -static void si_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) { + struct r600_context *rctx = (struct r600_context *)ctx; + struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); + uint16_t mask = sample_mask; + + if (pm4 == NULL) + return; + + si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); + si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); + + si_pm4_set_state(rctx, sample_mask, pm4); } static void si_delete_sampler_state(struct pipe_context *ctx, void *state) @@ -2813,6 +3057,16 @@ static void si_texture_barrier(struct pipe_context *ctx) si_pm4_set_state(rctx, texture_barrier, pm4); } +static void *si_create_resolve_blend(struct r600_context *rctx) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + blend.independent_blend_enable = true; + blend.rt[0].colormask = 0xf; + return si_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_RESOLVE); +} + void si_init_state_functions(struct r600_context *rctx) { rctx->context.create_blend_state = si_create_blend_state; @@ -2827,10 +3081,12 @@ void si_init_state_functions(struct r600_context *rctx) rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state; rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state; rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state; + rctx->custom_dsa_flush_depth_stencil = si_create_db_flush_dsa(rctx, true, true); rctx->custom_dsa_flush_depth = si_create_db_flush_dsa(rctx, true, false); rctx->custom_dsa_flush_stencil = si_create_db_flush_dsa(rctx, false, true); rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false); + rctx->custom_blend_resolve = si_create_resolve_blend(rctx); rctx->context.set_clip_state = si_set_clip_state; rctx->context.set_scissor_states = si_set_scissor_states; @@ -2838,6 +3094,7 @@ void si_init_state_functions(struct r600_context *rctx) rctx->context.set_stencil_ref = si_set_pipe_stencil_ref; rctx->context.set_framebuffer_state = si_set_framebuffer_state; + rctx->context.get_sample_position = si_get_sample_position; rctx->context.create_vs_state = si_create_vs_state; rctx->context.create_fs_state = si_create_fs_state; @@ -2918,8 +3175,6 @@ void si_init_config(struct r600_context *rctx) si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); - si_pm4_set_reg(pm4, R_028804_DB_EQAA, 0x110000); - si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); if (rctx->chip_class >= CIK) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 610303bb9a5..bc121048f2c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -86,6 +86,7 @@ union si_state { struct si_state_blend *blend; struct si_pm4_state *blend_color; struct si_pm4_state *clip; + struct si_pm4_state *sample_mask; struct si_pm4_state *scissor; struct si_state_viewport *viewport; struct si_pm4_state *framebuffer; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f03b34f4039..ceaead0b71a 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -606,7 +606,8 @@ static void si_state_draw(struct r600_context *rctx, struct si_state_dsa *dsa = rctx->queued.named.dsa; si_pm4_set_reg(pm4, R_028004_DB_COUNT_CONTROL, - S_028004_PERFECT_ZPASS_COUNTS(1)); + S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_SAMPLE_RATE(rctx->fb_log_samples)); si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, dsa->db_render_override | S_02800C_NOOP_CULL_DISABLE(1)); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 57ce72e0628..86394075cf0 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -6600,6 +6600,16 @@ #define G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x) (((x) >> 31) & 0x1) #define C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS 0x7FFFFFFF #define R_028804_DB_EQAA 0x028804 +#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0) +#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4) +#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8) +#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12) +#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16) +#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17) +#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18) +#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19) +#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20) +#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21) #define R_028808_CB_COLOR_CONTROL 0x028808 #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -- 2.30.2