From: Marek Olšák Date: Sat, 12 Jan 2013 02:29:40 +0000 (+0100) Subject: r300g: implement MSAA compression and fast MSAA color clear X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ca2c28859eca83f8fbf1f43616f5ef861e95e8d6;p=mesa.git r300g: implement MSAA compression and fast MSAA color clear These are optimizations which make MSAA a lot faster. The MSAA work is complete with this commit. (except for enablement of AA optimizations for RGBA16F, for which a patch is ready and waiting until the kernel CS checker fix lands) MSAA can't be made any faster as far as hw programming is concerned. The catch is only one process and one colorbuffer can use the optimizations at a time. There usually is only one MSAA colorbuffer, so it shouldn't be an issue. Also, there is a limit on the size of MSAA colorbuffer resolution in terms of megapixels. If the limit is surpassed, the AA optimizations are disabled. The limit is: - 1 Mpix on low-end and some mid-level chipsets (1024x768 and 1280x720) - 2 Mpix on some mid-level chipsets (1600x1200 and 1920x1080) - 3 or 4 Mpix on high-end chipsets (2048x1536 or 2560x1600, respectively) It corresponds to the number of raster pipes (= GB pipes) available, each pipe can hold 1 Mpix of AA compression data. If it's enabled, the driver prints to stdout: radeon: Acquired access to AA optimizations. --- diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index f8d3b1fd1d1..2bb6063846b 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -26,6 +26,7 @@ #include "r300_reg.h" #include "util/u_format.h" +#include "util/u_half.h" #include "util/u_pack_color.h" #include "util/u_surface.h" @@ -176,6 +177,25 @@ static uint32_t r300_hiz_clear_value(double depth) return r | (r << 8) | (r << 16) | (r << 24); } +static void r300_set_clear_color(struct r300_context *r300, + const union pipe_color_union *color) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + union util_color uc; + + memset(&uc, 0, sizeof(uc)); + util_pack_color(color->f, fb->cbufs[0]->format, &uc); + + if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) { + /* (0,1,2,3) maps to (B,G,R,A) */ + r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16); + r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16); + } else { + r300->color_clear_value = uc.ui; + } +} + DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE) /* Clear currently bound buffers. */ @@ -287,8 +307,44 @@ static void r300_clear(struct pipe_context* pipe, } } + /* Use fast color clear for an AA colorbuffer. + * The CMASK is shared between all colorbuffers, so we use it + * if there is only one colorbuffer bound. */ + if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 && + r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) { + /* Try to obtain the access to the CMASK if we don't have one. */ + if (!r300->cmask_access) { + r300->cmask_access = + r300->rws->cs_request_feature(r300->cs, + RADEON_FID_R300_CMASK_ACCESS, + TRUE); + } + + /* Setup the clear. */ + if (r300->cmask_access) { + /* Pair the resource with the CMASK to avoid other resources + * accessing it. */ + if (!r300->screen->cmask_resource) { + pipe_mutex_lock(r300->screen->cmask_mutex); + /* Double checking (first unlocked, then locked). */ + if (!r300->screen->cmask_resource) { + /* Don't reference this, so that the texture can be + * destroyed while set in cmask_resource. + * Then in texture_destroy, we set cmask_resource to NULL. */ + r300->screen->cmask_resource = fb->cbufs[0]->texture; + } + pipe_mutex_unlock(r300->screen->cmask_mutex); + } + + if (r300->screen->cmask_resource == fb->cbufs[0]->texture) { + r300_set_clear_color(r300, color); + r300_mark_atom_dirty(r300, &r300->cmask_clear); + buffers &= ~PIPE_CLEAR_COLOR; + } + } + } /* Enable CBZB clear. */ - if (r300_cbzb_clear_allowed(r300, buffers)) { + else if (r300_cbzb_clear_allowed(r300, buffers)) { struct r300_surface *surf = r300_surface(fb->cbufs[0]); hyperz->zb_depthclearvalue = @@ -312,13 +368,16 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, cformat, color, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { + } else if (r300->zmask_clear.dirty || + r300->hiz_clear.dirty || + r300->cmask_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ /* Calculate zmask_clear and hiz_clear atom sizes. */ unsigned dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + + (r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) + r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ @@ -337,6 +396,11 @@ static void r300_clear(struct pipe_context* pipe, r300->hiz_clear.state); r300->hiz_clear.dirty = FALSE; } + if (r300->cmask_clear.dirty) { + r300_emit_cmask_clear(r300, r300->cmask_clear.size, + r300->cmask_clear.state); + r300->cmask_clear.dirty = FALSE; + } } else { assert(0); } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f8b5d4e3d3e..996491e9431 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -30,6 +30,7 @@ #define RV530_HIZ_LIMIT 15360 /* rv3xx have only one pipe */ +#define PIPE_CMASK_SIZE 4096 #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index c744fea10cf..a6fccc6e02c 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -75,6 +75,9 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->cs && r300->hyperz_enabled) { r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE); } + if (r300->cs && r300->cmask_access) { + r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE); + } if (r300->blitter) util_blitter_destroy(r300->blitter); @@ -203,6 +206,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* Clear commands */ R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 6 : 0); R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 6 : 0); + R300_INIT_ATOM(cmask_clear, 6); /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1b912c3eeee..33851519bd8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -309,6 +309,7 @@ struct r300_surface { uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ uint32_t pitch_zmask; /* ZMASK_PITCH */ uint32_t pitch_hiz; /* HIZ_PITCH */ + uint32_t pitch_cmask; /* CMASK_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -380,6 +381,10 @@ struct r300_texture_desc { /* Zmask/HiZ strides for each miplevel. */ unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + + /* CMASK info for AA buffers (no mipmapping). */ + unsigned cmask_dwords; + unsigned cmask_stride_in_pixels; }; struct r300_resource @@ -536,6 +541,8 @@ struct r300_context { struct r300_atom hiz_clear; /* zmask clear */ struct r300_atom zmask_clear; + /* cmask clear */ + struct r300_atom cmask_clear; /* Occlusion query. */ struct r300_atom query_start; @@ -616,6 +623,13 @@ struct r300_context { enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */ uint32_t hiz_clear_value; /* HiZ clear value. */ + /* CMASK state. */ + boolean cmask_access; + boolean cmask_in_use; + uint32_t color_clear_value; /* RGBA8 or RGBA1010102 */ + uint32_t color_clear_value_ar; /* RGBA16F */ + uint32_t color_clear_value_gb; /* RGBA16F */ + /* Compiler state. */ struct rc_regalloc_state fs_regalloc_state; /* Register allocator info for * fragment shaders. */ @@ -722,7 +736,8 @@ void r300_blitter_draw_rectangle(struct blitter_context *blitter, enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, R300_CHANGED_HYPERZ_FLAG, - R300_CHANGED_MULTIWRITE + R300_CHANGED_MULTIWRITE, + R300_CHANGED_CMASK_ENABLE, }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 5ec2cf9e4ae..ff1f16d913a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -50,6 +50,7 @@ static const struct debug_named_value debug_options[] = { { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, + { "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index cb6c46e5e6c..9ea084fac2d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -399,14 +399,17 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) BEGIN_CS(size); - /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not - * what we usually want. */ if (r300->screen->caps.is_r500) { rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } + /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */ if (fb->nr_cbufs && r300->fb_multiwrite) { rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); } + if (r300->cmask_in_use) { + rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE | + R300_RB3D_CCTL_CMASK_ENABLE; + } OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); @@ -419,6 +422,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); OUT_CS_RELOC(surf); + + if (r300->cmask_in_use && i == 0) { + OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0); + OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask); + OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value); + } } /* Set up the ZB part of the CBZB clear. */ @@ -1240,6 +1249,30 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state r300_mark_atom_dirty(r300, &r300->hyperz_state); } +void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_resource *tex; + CS_LOCALS(r300); + + tex = r300_resource(fb->cbufs[0]->texture); + + BEGIN_CS(size); + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2); + OUT_CS(0); + OUT_CS(tex->tex.cmask_dwords); + OUT_CS(0); + END_CS; + + /* Mark the current zbuffer's zmask as in use. */ + r300->cmask_in_use = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE); +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index a58ab857f56..eaa0a6c4ac0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -117,6 +117,7 @@ void r300_emit_invariant_state(struct r300_context *r300, void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state); unsigned r300_get_num_dirty_dwords(struct r300_context *r300); unsigned r300_get_num_cs_end_dwords(struct r300_context *r300); diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 5e1d8101910..8342ef532d3 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2390,7 +2390,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 * formats, ignoring the fields. */ -#define RB3D_COLOR_CLEAR_VALUE 0x4e14 +#define R300_RB3D_COLOR_CLEAR_VALUE 0x4E14 +/* For FP16 AA. */ +#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46C0 +#define R500_RB3D_COLOR_CLEAR_VALUE_GB 0x46C4 /* gap */ @@ -2485,6 +2488,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) /* reserved */ +#define R300_RB3D_CMASK_OFFSET0 0x4E54 +#define R300_RB3D_CMASK_OFFSET1 0x4E58 +#define R300_RB3D_CMASK_OFFSET2 0x4E5C +#define R300_RB3D_CMASK_OFFSET3 0x4E60 +#define R300_RB3D_CMASK_PITCH0 0x4E64 +#define R300_RB3D_CMASK_PITCH1 0x4E68 +#define R300_RB3D_CMASK_PITCH2 0x4E6C +#define R300_RB3D_CMASK_PITCH3 0x4E70 +#define R300_RB3D_CMASK_WRINDEX 0x4E74 +#define R300_RB3D_CMASK_DWORD 0x4E78 +#define R300_RB3D_CMASK_RDINDEX 0x4E7C + /* Resolve buffer destination address. The cache must be empty before changing * this register if the cb is in resolve mode. Unpipelined */ @@ -3504,6 +3519,7 @@ enum { * 2. CLEAR_VALUE: Value to write into HIZ RAM. */ #define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 +#define R300_PACKET3_3D_CLEAR_CMASK 0x00003800 /* Draws a set of primitives using vertex buffers pointed by the state data. * At least 2 Parameters: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 3af5774ea51..d0f00700f81 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -522,6 +522,8 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_screen* r300screen = r300_screen(pscreen); struct radeon_winsys *rws = radeon_winsys(pscreen); + pipe_mutex_destroy(r300screen->cmask_mutex); + if (rws) rws->destroy(rws); @@ -612,6 +614,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) r300_init_screen_resource_functions(r300screen); util_format_s3tc_init(); + pipe_mutex_init(r300screen->cmask_mutex); return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index d2bed8d528e..e129cee57c7 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -28,6 +28,7 @@ #include "../../winsys/radeon/drm/radeon_winsys.h" #include "pipe/p_screen.h" #include "util/u_slab.h" +#include "os/os_thread.h" #include struct r300_screen { @@ -42,6 +43,10 @@ struct r300_screen { /** Combination of DBG_xxx flags */ unsigned debug; + + /* The MSAA texture with CMASK access; */ + struct pipe_resource *cmask_resource; + pipe_mutex cmask_mutex; }; @@ -92,6 +97,7 @@ radeon_winsys(struct pipe_screen *screen) { #define DBG_NO_CBZB (1 << 20) #define DBG_NO_ZMASK (1 << 21) #define DBG_NO_HIZ (1 << 22) +#define DBG_NO_CMASK (1 << 23) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 4a5a5a89b84..fa256aa2c55 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -825,6 +825,10 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, r300->fb_state.size += 8; } + if (r300->cmask_in_use) { + r300->fb_state.size += 6; + } + /* The size of the rest of atoms stays the same. */ } @@ -900,6 +904,11 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use); + /* Set whether CMASK can be used. */ + r300->cmask_in_use = + state->nr_cbufs == 1 && + r300->screen->cmask_resource == state->cbufs[0]->texture; + /* Need to reset clamping or colormask. */ r300_mark_atom_dirty(r300, &r300->blend_state); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 4be6d5067d2..6816fd01ab3 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -906,14 +906,23 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) surf->format = r300_translate_out_fmt(surf->base.format); surf->colormask_swizzle = r300_translate_colormask_swizzle(surf->base.format); + surf->pitch_cmask = tex->tex.cmask_stride_in_pixels; } } static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { + struct r300_screen *rscreen = r300_screen(screen); struct r300_resource* tex = (struct r300_resource*)texture; + if (tex->tex.cmask_dwords) { + pipe_mutex_lock(rscreen->cmask_mutex); + if (texture == rscreen->cmask_resource) { + rscreen->cmask_resource = NULL; + } + pipe_mutex_unlock(rscreen->cmask_mutex); + } pb_reference(&tex->buf, NULL); FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 8928f73f7cf..46f07e2c522 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -409,6 +409,48 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, } } +static void r300_setup_cmask_properties(struct r300_screen *screen, + struct r300_resource *tex) +{ + static unsigned cmask_align_x[4] = {16, 32, 48, 32}; + static unsigned cmask_align_y[4] = {16, 16, 16, 32}; + unsigned pipes, stride, cmask_num_dw; + + /* We need an AA colorbuffer, no mipmaps. */ + if (tex->b.b.nr_samples <= 1 || + tex->b.b.last_level > 0 || + util_format_is_depth_or_stencil(tex->b.b.format)) { + return; + } + + if (tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT) { + return; + } + + if (SCREEN_DBG_ON(screen, DBG_NO_CMASK)) { + return; + } + + /* CMASK is part of raster pipes. The number of Z pipes doesn't matter. */ + pipes = screen->info.r300_num_gb_pipes; + + stride = r300_stride_to_width(tex->b.b.format, + tex->tex.stride_in_bytes[0]); + stride = align(stride, 16); + + /* Get the CMASK size in dwords. */ + cmask_num_dw = r300_pixels_to_dwords(stride, tex->b.b.height0, + cmask_align_x[pipes-1], + cmask_align_y[pipes-1]); + + /* Check the CMASK size against the CMASK memory limit. */ + if (cmask_num_dw <= PIPE_CMASK_SIZE * pipes) { + tex->tex.cmask_dwords = cmask_num_dw; + tex->tex.cmask_stride_in_pixels = + util_align_npot(stride, cmask_align_x[pipes-1]); + } +} + static void r300_setup_tiling(struct r300_screen *screen, struct r300_resource *tex) { @@ -532,6 +574,7 @@ void r300_texture_desc_init(struct r300_screen *rscreen, } r300_setup_hyperz_properties(rscreen, tex); + r300_setup_cmask_properties(rscreen, tex); if (SCREEN_DBG_ON(rscreen, DBG_TEX)) r300_tex_print_info(tex, "texture_desc_init");