These are optimizations which make MSAA a lot faster.
The MSAA work is complete with this commit. (except for enablement of AA
optimizations for RGBA16F, for which a patch is ready and waiting until
the kernel CS checker fix lands)
MSAA can't be made any faster as far as hw programming is concerned.
The catch is only one process and one colorbuffer can use the optimizations
at a time. There usually is only one MSAA colorbuffer, so it shouldn't be
an issue.
Also, there is a limit on the size of MSAA colorbuffer resolution in terms
of megapixels. If the limit is surpassed, the AA optimizations are disabled.
The limit is:
- 1 Mpix on low-end and some mid-level chipsets (1024x768 and 1280x720)
- 2 Mpix on some mid-level chipsets (1600x1200 and 1920x1080)
- 3 or 4 Mpix on high-end chipsets (2048x1536 or 2560x1600, respectively)
It corresponds to the number of raster pipes (= GB pipes) available, each pipe
can hold 1 Mpix of AA compression data.
If it's enabled, the driver prints to stdout:
radeon: Acquired access to AA optimizations.
#include "r300_reg.h"
#include "util/u_format.h"
+#include "util/u_half.h"
#include "util/u_pack_color.h"
#include "util/u_surface.h"
return r | (r << 8) | (r << 16) | (r << 24);
}
+static void r300_set_clear_color(struct r300_context *r300,
+ const union pipe_color_union *color)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ union util_color uc;
+
+ memset(&uc, 0, sizeof(uc));
+ util_pack_color(color->f, fb->cbufs[0]->format, &uc);
+
+ if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+ /* (0,1,2,3) maps to (B,G,R,A) */
+ r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16);
+ r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16);
+ } else {
+ r300->color_clear_value = uc.ui;
+ }
+}
+
DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE)
/* Clear currently bound buffers. */
}
}
+ /* Use fast color clear for an AA colorbuffer.
+ * The CMASK is shared between all colorbuffers, so we use it
+ * if there is only one colorbuffer bound. */
+ if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 &&
+ r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) {
+ /* Try to obtain the access to the CMASK if we don't have one. */
+ if (!r300->cmask_access) {
+ r300->cmask_access =
+ r300->rws->cs_request_feature(r300->cs,
+ RADEON_FID_R300_CMASK_ACCESS,
+ TRUE);
+ }
+
+ /* Setup the clear. */
+ if (r300->cmask_access) {
+ /* Pair the resource with the CMASK to avoid other resources
+ * accessing it. */
+ if (!r300->screen->cmask_resource) {
+ pipe_mutex_lock(r300->screen->cmask_mutex);
+ /* Double checking (first unlocked, then locked). */
+ if (!r300->screen->cmask_resource) {
+ /* Don't reference this, so that the texture can be
+ * destroyed while set in cmask_resource.
+ * Then in texture_destroy, we set cmask_resource to NULL. */
+ r300->screen->cmask_resource = fb->cbufs[0]->texture;
+ }
+ pipe_mutex_unlock(r300->screen->cmask_mutex);
+ }
+
+ if (r300->screen->cmask_resource == fb->cbufs[0]->texture) {
+ r300_set_clear_color(r300, color);
+ r300_mark_atom_dirty(r300, &r300->cmask_clear);
+ buffers &= ~PIPE_CLEAR_COLOR;
+ }
+ }
+ }
/* Enable CBZB clear. */
- if (r300_cbzb_clear_allowed(r300, buffers)) {
+ else if (r300_cbzb_clear_allowed(r300, buffers)) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
hyperz->zb_depthclearvalue =
fb->nr_cbufs,
buffers, cformat, color, depth, stencil);
r300_blitter_end(r300);
- } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
+ } else if (r300->zmask_clear.dirty ||
+ r300->hiz_clear.dirty ||
+ r300->cmask_clear.dirty) {
/* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
/* Calculate zmask_clear and hiz_clear atom sizes. */
unsigned dwords =
(r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ (r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
r300->hiz_clear.state);
r300->hiz_clear.dirty = FALSE;
}
+ if (r300->cmask_clear.dirty) {
+ r300_emit_cmask_clear(r300, r300->cmask_clear.size,
+ r300->cmask_clear.state);
+ r300->cmask_clear.dirty = FALSE;
+ }
} else {
assert(0);
}
#define RV530_HIZ_LIMIT 15360
/* rv3xx have only one pipe */
+#define PIPE_CMASK_SIZE 4096
#define PIPE_ZMASK_SIZE 4096
#define RV3xx_ZMASK_SIZE 5120
if (r300->cs && r300->hyperz_enabled) {
r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE);
}
+ if (r300->cs && r300->cmask_access) {
+ r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_CMASK_ACCESS, FALSE);
+ }
if (r300->blitter)
util_blitter_destroy(r300->blitter);
/* Clear commands */
R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 6 : 0);
R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 6 : 0);
+ R300_INIT_ATOM(cmask_clear, 6);
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t pitch_zmask; /* ZMASK_PITCH */
uint32_t pitch_hiz; /* HIZ_PITCH */
+ uint32_t pitch_cmask; /* CMASK_PITCH */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
/* Parameters dedicated to the CBZB clear. */
/* Zmask/HiZ strides for each miplevel. */
unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS];
+
+ /* CMASK info for AA buffers (no mipmapping). */
+ unsigned cmask_dwords;
+ unsigned cmask_stride_in_pixels;
};
struct r300_resource
struct r300_atom hiz_clear;
/* zmask clear */
struct r300_atom zmask_clear;
+ /* cmask clear */
+ struct r300_atom cmask_clear;
/* Occlusion query. */
struct r300_atom query_start;
enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */
uint32_t hiz_clear_value; /* HiZ clear value. */
+ /* CMASK state. */
+ boolean cmask_access;
+ boolean cmask_in_use;
+ uint32_t color_clear_value; /* RGBA8 or RGBA1010102 */
+ uint32_t color_clear_value_ar; /* RGBA16F */
+ uint32_t color_clear_value_gb; /* RGBA16F */
+
/* Compiler state. */
struct rc_regalloc_state fs_regalloc_state; /* Register allocator info for
* fragment shaders. */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
R300_CHANGED_HYPERZ_FLAG,
- R300_CHANGED_MULTIWRITE
+ R300_CHANGED_MULTIWRITE,
+ R300_CHANGED_CMASK_ENABLE,
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
{ "nocbzb", DBG_NO_CBZB, "Disable fast color clear" },
{ "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
+ { "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
/* must be last */
DEBUG_NAMED_VALUE_END
BEGIN_CS(size);
- /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
- * what we usually want. */
if (r300->screen->caps.is_r500) {
rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;
}
+ /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */
if (fb->nr_cbufs && r300->fb_multiwrite) {
rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);
}
+ if (r300->cmask_in_use) {
+ rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE |
+ R300_RB3D_CCTL_CMASK_ENABLE;
+ }
OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);
OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);
OUT_CS_RELOC(surf);
+
+ if (r300->cmask_in_use && i == 0) {
+ OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0);
+ OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask);
+ OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value);
+ }
}
/* Set up the ZB part of the CBZB clear. */
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
+void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_resource *tex;
+ CS_LOCALS(r300);
+
+ tex = r300_resource(fb->cbufs[0]->texture);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.cmask_dwords);
+ OUT_CS(0);
+ END_CS;
+
+ /* Mark the current zbuffer's zmask as in use. */
+ r300->cmask_in_use = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE);
+}
+
void r300_emit_ztop_state(struct r300_context* r300,
unsigned size, void* state)
{
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
+void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state);
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
unsigned r300_get_num_cs_end_dwords(struct r300_context *r300);
* Program this register with a 32-bit value in ARGB8888 or ARGB2101010
* formats, ignoring the fields.
*/
-#define RB3D_COLOR_CLEAR_VALUE 0x4e14
+#define R300_RB3D_COLOR_CLEAR_VALUE 0x4E14
+/* For FP16 AA. */
+#define R500_RB3D_COLOR_CLEAR_VALUE_AR 0x46C0
+#define R500_RB3D_COLOR_CLEAR_VALUE_GB 0x46C4
/* gap */
# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
/* reserved */
+#define R300_RB3D_CMASK_OFFSET0 0x4E54
+#define R300_RB3D_CMASK_OFFSET1 0x4E58
+#define R300_RB3D_CMASK_OFFSET2 0x4E5C
+#define R300_RB3D_CMASK_OFFSET3 0x4E60
+#define R300_RB3D_CMASK_PITCH0 0x4E64
+#define R300_RB3D_CMASK_PITCH1 0x4E68
+#define R300_RB3D_CMASK_PITCH2 0x4E6C
+#define R300_RB3D_CMASK_PITCH3 0x4E70
+#define R300_RB3D_CMASK_WRINDEX 0x4E74
+#define R300_RB3D_CMASK_DWORD 0x4E78
+#define R300_RB3D_CMASK_RDINDEX 0x4E7C
+
/* Resolve buffer destination address. The cache must be empty before changing
* this register if the cb is in resolve mode. Unpipelined
*/
* 2. CLEAR_VALUE: Value to write into HIZ RAM.
*/
#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+#define R300_PACKET3_3D_CLEAR_CMASK 0x00003800
/* Draws a set of primitives using vertex buffers pointed by the state data.
* At least 2 Parameters:
struct r300_screen* r300screen = r300_screen(pscreen);
struct radeon_winsys *rws = radeon_winsys(pscreen);
+ pipe_mutex_destroy(r300screen->cmask_mutex);
+
if (rws)
rws->destroy(rws);
r300_init_screen_resource_functions(r300screen);
util_format_s3tc_init();
+ pipe_mutex_init(r300screen->cmask_mutex);
return &r300screen->screen;
}
#include "../../winsys/radeon/drm/radeon_winsys.h"
#include "pipe/p_screen.h"
#include "util/u_slab.h"
+#include "os/os_thread.h"
#include <stdio.h>
struct r300_screen {
/** Combination of DBG_xxx flags */
unsigned debug;
+
+ /* The MSAA texture with CMASK access; */
+ struct pipe_resource *cmask_resource;
+ pipe_mutex cmask_mutex;
};
#define DBG_NO_CBZB (1 << 20)
#define DBG_NO_ZMASK (1 << 21)
#define DBG_NO_HIZ (1 << 22)
+#define DBG_NO_CMASK (1 << 23)
/* Statistics. */
#define DBG_P_STAT (1 << 25)
/*@}*/
r300->fb_state.size += 8;
}
+ if (r300->cmask_in_use) {
+ r300->fb_state.size += 6;
+ }
+
/* The size of the rest of atoms stays the same. */
}
}
assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);
+ /* Set whether CMASK can be used. */
+ r300->cmask_in_use =
+ state->nr_cbufs == 1 &&
+ r300->screen->cmask_resource == state->cbufs[0]->texture;
+
/* Need to reset clamping or colormask. */
r300_mark_atom_dirty(r300, &r300->blend_state);
surf->format = r300_translate_out_fmt(surf->base.format);
surf->colormask_swizzle =
r300_translate_colormask_swizzle(surf->base.format);
+ surf->pitch_cmask = tex->tex.cmask_stride_in_pixels;
}
}
static void r300_texture_destroy(struct pipe_screen *screen,
struct pipe_resource* texture)
{
+ struct r300_screen *rscreen = r300_screen(screen);
struct r300_resource* tex = (struct r300_resource*)texture;
+ if (tex->tex.cmask_dwords) {
+ pipe_mutex_lock(rscreen->cmask_mutex);
+ if (texture == rscreen->cmask_resource) {
+ rscreen->cmask_resource = NULL;
+ }
+ pipe_mutex_unlock(rscreen->cmask_mutex);
+ }
pb_reference(&tex->buf, NULL);
FREE(tex);
}
}
}
+static void r300_setup_cmask_properties(struct r300_screen *screen,
+ struct r300_resource *tex)
+{
+ static unsigned cmask_align_x[4] = {16, 32, 48, 32};
+ static unsigned cmask_align_y[4] = {16, 16, 16, 32};
+ unsigned pipes, stride, cmask_num_dw;
+
+ /* We need an AA colorbuffer, no mipmaps. */
+ if (tex->b.b.nr_samples <= 1 ||
+ tex->b.b.last_level > 0 ||
+ util_format_is_depth_or_stencil(tex->b.b.format)) {
+ return;
+ }
+
+ if (tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+ return;
+ }
+
+ if (SCREEN_DBG_ON(screen, DBG_NO_CMASK)) {
+ return;
+ }
+
+ /* CMASK is part of raster pipes. The number of Z pipes doesn't matter. */
+ pipes = screen->info.r300_num_gb_pipes;
+
+ stride = r300_stride_to_width(tex->b.b.format,
+ tex->tex.stride_in_bytes[0]);
+ stride = align(stride, 16);
+
+ /* Get the CMASK size in dwords. */
+ cmask_num_dw = r300_pixels_to_dwords(stride, tex->b.b.height0,
+ cmask_align_x[pipes-1],
+ cmask_align_y[pipes-1]);
+
+ /* Check the CMASK size against the CMASK memory limit. */
+ if (cmask_num_dw <= PIPE_CMASK_SIZE * pipes) {
+ tex->tex.cmask_dwords = cmask_num_dw;
+ tex->tex.cmask_stride_in_pixels =
+ util_align_npot(stride, cmask_align_x[pipes-1]);
+ }
+}
+
static void r300_setup_tiling(struct r300_screen *screen,
struct r300_resource *tex)
{
}
r300_setup_hyperz_properties(rscreen, tex);
+ r300_setup_cmask_properties(rscreen, tex);
if (SCREEN_DBG_ON(rscreen, DBG_TEX))
r300_tex_print_info(tex, "texture_desc_init");