From: Marek Olšák Date: Sat, 5 Jan 2013 05:21:49 +0000 (+0100) Subject: r300g: implement MSAA X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8ed6b1400bc8a78f46340f41aaf2e88b24c23267;p=mesa.git r300g: implement MSAA This is not as optimized as r600g - the MSAA compression is missing, so r300g needs a lot of bandwidth (more than r600g to do the same thing). However, if the bandwidth is not an issue for you, you can enjoy this unoptimized MSAA support. The only other missing optimization for MSAA is the fast color clear. MSAA is enabled on r500 only, because that's the only GPU family I tested. That said, MSAA should work on r300 and r400 as well (but you must set RADEON_MSAA=1 to allow it, then turn MSAA on in your app or set GALLIUM_MSAA=n, n >= 2, n <= 6) I will enable the support by default on r300-r400 once someone (other than me) tests those chipsets with piglit. The supported modes are 2x, 4x, 6x. The supported MSAA formats are RGBA8, BGRA8, and RGBA16F (r500 only). Those 3 formats are used for all GL internal formats. Tested with piglit. (I have ported all MSAA tests to GL2.1) --- diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1b9a3f438ac..95224020c79 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -1816,7 +1816,8 @@ void util_blitter_custom_color(struct blitter_context *blitter, blitter_disable_render_cond(ctx); /* bind states */ - pipe->bind_blend_state(pipe, custom_blend); + pipe->bind_blend_state(pipe, custom_blend ? custom_blend + : ctx->blend[PIPE_MASK_RGBA]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1, FALSE)); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 9fff3700ed3..46578318af2 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -23,6 +23,7 @@ #include "r300_context.h" #include "r300_emit.h" #include "r300_texture.h" +#include "r300_reg.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -66,6 +67,7 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_scissor(r300->blitter, r300->scissor_state.state); + util_blitter_save_sample_mask(r300->blitter, *(unsigned*)r300->sample_mask.state); util_blitter_save_vertex_buffer_slot(r300->blitter, r300->vertex_buffer); util_blitter_save_vertex_elements(r300->blitter, r300->velems); @@ -478,6 +480,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe, return; } + /* Can't read MSAA textures. */ + if (src->nr_samples > 1 || dst->nr_samples > 1) { + return; + } + /* The code below changes the texture format so that the copy can be done * on hardware. E.g. depth-stencil surfaces are copied as RGBA * colorbuffers. */ @@ -595,20 +602,141 @@ static void r300_resource_copy_region(struct pipe_context *pipe, pipe_sampler_view_reference(&src_view, NULL); } +static boolean r300_is_simple_msaa_resolve(const struct pipe_blit_info *info) +{ + unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); + unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); + + return info->dst.resource->format == info->src.resource->format && + info->dst.resource->format == info->dst.format && + info->src.resource->format == info->src.format && + !info->scissor_enable && + info->mask == PIPE_MASK_RGBA && + dst_width == info->src.resource->width0 && + dst_height == info->src.resource->height0 && + info->dst.box.x == 0 && + info->dst.box.y == 0 && + info->dst.box.width == dst_width && + info->dst.box.height == dst_height && + info->src.box.x == 0 && + info->src.box.y == 0 && + info->src.box.width == dst_width && + info->src.box.height == dst_height; +} + +static void r300_simple_msaa_resolve(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_layer, + struct pipe_resource *src, + enum pipe_format format) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_surface *srcsurf, *dstsurf; + struct pipe_surface surf_tmpl; + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; + + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = format; + srcsurf = r300_surface(pipe->create_surface(pipe, src, &surf_tmpl)); + + surf_tmpl.format = format; + surf_tmpl.u.tex.level = dst_level; + surf_tmpl.u.tex.first_layer = + surf_tmpl.u.tex.last_layer = dst_layer; + dstsurf = r300_surface(pipe->create_surface(pipe, dst, &surf_tmpl)); + + /* COLORPITCH should contain the tiling info of the resolve buffer. + * The tiling of the AA buffer isn't programmable anyway. */ + srcsurf->pitch &= ~(R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3)); + srcsurf->pitch |= dstsurf->pitch & (R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3)); + + /* Enable AA resolve. */ + aa->dest = dstsurf; + r300->aa_state.size = 8; + r300_mark_atom_dirty(r300, &r300->aa_state); + + /* Resolve the surface. */ + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_custom_color(r300->blitter, &srcsurf->base, NULL); + r300_blitter_end(r300); + + /* Disable AA resolve. */ + aa->dest = NULL; + r300->aa_state.size = 4; + r300_mark_atom_dirty(r300, &r300->aa_state); + + pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); + pipe_surface_reference((struct pipe_surface**)&dstsurf, NULL); +} + +static void r300_msaa_resolve(struct pipe_context *pipe, + const struct pipe_blit_info *info) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_screen *screen = pipe->screen; + struct pipe_resource *tmp, templ; + struct pipe_blit_info blit; + + assert(info->src.level == 0); + assert(info->src.box.z == 0); + assert(info->src.box.depth == 1); + assert(info->dst.box.depth == 1); + + if (r300_is_simple_msaa_resolve(info)) { + r300_simple_msaa_resolve(pipe, info->dst.resource, info->dst.level, + info->dst.box.z, info->src.resource, + info->src.format); + return; + } + + /* resolve into a temporary texture, then blit */ + memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_TEXTURE_2D; + templ.format = info->src.resource->format; + templ.width0 = info->src.resource->width0; + templ.height0 = info->src.resource->height0; + templ.depth0 = 1; + templ.array_size = 1; + templ.usage = PIPE_USAGE_STATIC; + + tmp = screen->resource_create(screen, &templ); + + /* resolve */ + r300_simple_msaa_resolve(pipe, tmp, 0, 0, info->src.resource, + info->src.format); + + /* blit */ + blit = *info; + blit.src.resource = tmp; + blit.src.box.z = 0; + + r300_blitter_begin(r300, R300_BLIT); + util_blitter_blit(r300->blitter, &blit); + r300_blitter_end(r300); + + pipe_resource_reference(&tmp, NULL); +} + static void r300_blit(struct pipe_context *pipe, - const struct pipe_blit_info *blit_info) + const struct pipe_blit_info *blit) { struct r300_context *r300 = r300_context(pipe); struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct pipe_blit_info info = *blit_info; + struct pipe_blit_info info = *blit; - /* Decompress ZMASK. */ - if (r300->zmask_in_use && !r300->locked_zbuffer) { - if (fb->zsbuf->texture == info.src.resource || - fb->zsbuf->texture == info.dst.resource) { - r300_decompress_zmask(r300); - } + /* MSAA resolve. */ + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format)) { + r300_msaa_resolve(pipe, &info); + return; + } + + /* Can't read MSAA textures. */ + if (info.src.resource->nr_samples > 1) { + return; } /* Blit a combined depth-stencil resource as color. @@ -616,12 +744,29 @@ static void r300_blit(struct pipe_context *pipe, if ((info.mask & PIPE_MASK_S) && info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM && info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) { - info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM; - info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; - if (info.mask & PIPE_MASK_Z) { - info.mask = PIPE_MASK_RGBA; /* depth+stencil */ + if (info.dst.resource->nr_samples > 1) { + /* Cannot do that with MSAA buffers. */ + info.mask &= ~PIPE_MASK_S; + if (!(info.mask & PIPE_MASK_Z)) { + return; + } } else { - info.mask = PIPE_MASK_B; /* stencil only */ + /* Single-sample buffer. */ + info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM; + info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; + if (info.mask & PIPE_MASK_Z) { + info.mask = PIPE_MASK_RGBA; /* depth+stencil */ + } else { + info.mask = PIPE_MASK_B; /* stencil only */ + } + } + } + + /* Decompress ZMASK. */ + if (r300->zmask_in_use && !r300->locked_zbuffer) { + if (fb->zsbuf->texture == info.src.resource || + fb->zsbuf->texture == info.dst.resource) { + r300_decompress_zmask(r300); } } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 8178c394d07..b498454561d 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -105,6 +105,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->hyperz_state.state); FREE(r300->invariant_state.state); FREE(r300->rs_block_state.state); + FREE(r300->sample_mask.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); FREE(r300->vap_invariant_state.state); @@ -175,9 +176,10 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); /* SC. */ + R300_INIT_ATOM(sample_mask, 2); R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 14 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); @@ -224,6 +226,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_ALLOC_ATOM(ztop_state, r300_ztop_state); R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state); R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state); + r300->sample_mask.state = malloc(4); R300_ALLOC_ATOM(scissor_state, pipe_scissor_state); R300_ALLOC_ATOM(rs_block_state, r300_rs_block); R300_ALLOC_ATOM(fs_constants, r300_constant_buffer); @@ -270,6 +273,7 @@ static void r300_init_states(struct pipe_context *pipe) pipe->set_blend_color(pipe, &bc); pipe->set_clip_state(pipe, &cs); pipe->set_scissor_state(pipe, &ss); + pipe->set_sample_mask(pipe, ~0); /* Initialize the GPU flush. */ { @@ -317,7 +321,6 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index cddd91d7ab8..1b912c3eeee 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -73,7 +73,6 @@ struct r300_aa_state { struct r300_surface *dest; uint32_t aa_config; - uint32_t aaresolve_ctl; }; struct r300_blend_state { @@ -499,6 +498,8 @@ struct r300_context { struct r300_atom blend_color_state; /* Scissor state. */ struct r300_atom scissor_state; + /* Sample mask. */ + struct r300_atom sample_mask; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; /* Viewport state. */ @@ -573,6 +574,10 @@ struct r300_context { enum r300_fs_validity_status fs_status; /* Framebuffer multi-write. */ boolean fb_multiwrite; + unsigned num_samples; + boolean msaa_enable; + boolean alpha_to_one; + boolean alpha_to_coverage; void *dsa_decompress_zmask; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1b9de40afaa..1700cbb4667 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -93,6 +93,13 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) } } + /* Setup alpha-to-coverage. */ + if (r300->alpha_to_coverage && r300->msaa_enable) { + /* Always set 3/6, it improves precision even for 2x and 4x MSAA. */ + alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE | + R300_FG_ALPHA_FUNC_CFG_3_OF_6; + } + OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func); WRITE_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2); } @@ -366,12 +373,16 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3); + OUT_CS(aa->dest->offset); + OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK); + OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE); OUT_CS_RELOC(aa->dest); - OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); + } else { + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0); } - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); END_CS; } @@ -475,12 +486,85 @@ void r300_emit_hyperz_end(struct r300_context *r300) r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } +#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x) \ + (((x0) & 0xf) | (((y0) & 0xf) << 4) | \ + (((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) | \ + (((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) | \ + (((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28)) + +static unsigned r300_get_mspos(int index, unsigned *p) +{ + unsigned reg, i, distx, disty, dist; + + if (index == 0) { + /* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles, + * followed by a (Y,X) pair containing the minimum distance from the pixel + * edge: + * X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X + * + * There is a quirk when setting D0_X. The value represents the distance + * from the left edge of the pixel quad to the first sample in subpixels. + * All values less than eight should use the actual value, but „7‟ should + * be used for the distance „8‟. The hardware will convert 7 into 8 internally. + */ + distx = 11; + for (i = 0; i < 12; i += 2) { + if (p[i] < distx) + distx = p[i]; + } + + disty = 11; + for (i = 1; i < 12; i += 2) { + if (p[i] < disty) + disty = p[i]; + } + + if (distx == 8) + distx = 7; + + reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx); + } else { + /* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles, + * followed by the minimum distance from the pixel edge (not sure if X or Y): + * X3, Y3, X4, Y4, X5, Y5, D1 + */ + dist = 11; + for (i = 0; i < 12; i++) { + if (p[i] < dist) + dist = p[i]; + } + + reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0); + } + return reg; +} + void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { + /* The sample coordinates are in the range [0,11], because + * GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision. + * + * Some sample coordinates reach to neighboring pixels and should not be used. + * (e.g. Y=11) + * + * The unused samples must be set to the positions of other valid samples. */ + static unsigned sample_locs_1x[12] = { + 6,6, 6,6, 6,6, 6,6, 6,6, 6,6 + }; + static unsigned sample_locs_2x[12] = { + 3,9, 9,3, 9,3, 9,3, 9,3, 9,3 + }; + static unsigned sample_locs_4x[12] = { + 4,4, 8,8, 2,10, 10,2, 10,2, 10,2 + }; + static unsigned sample_locs_6x[12] = { + 3,1, 7,3, 11,5, 1,7, 5,9, 9,10 + }; + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - unsigned i, num_cbufs = fb->nr_cbufs; + unsigned i, num_samples, num_cbufs = fb->nr_cbufs; unsigned mspos0, mspos1; CS_LOCALS(r300); @@ -509,32 +593,28 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved - * to the AA state. */ - mspos0 = 0x66666666; - mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } + * to the AA state. + */ + num_samples = r300->msaa_enable ? r300->num_samples : 1; + + /* Sample positions. */ + switch (num_samples) { + default: + mspos0 = r300_get_mspos(0, sample_locs_1x); + mspos1 = r300_get_mspos(1, sample_locs_1x); + break; + case 2: + mspos0 = r300_get_mspos(0, sample_locs_2x); + mspos1 = r300_get_mspos(1, sample_locs_2x); + break; + case 4: + mspos0 = r300_get_mspos(0, sample_locs_4x); + mspos1 = r300_get_mspos(1, sample_locs_4x); + break; + case 6: + mspos0 = r300_get_mspos(0, sample_locs_6x); + mspos1 = r300_get_mspos(1, sample_locs_6x); + break; } OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); @@ -751,6 +831,18 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } +void r300_emit_sample_mask(struct r300_context *r300, + unsigned size, void *state) +{ + unsigned mask = (*(unsigned*)state) & ((1 << 6)-1); + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_SC_SCREENDOOR, + mask | (mask << 6) | (mask << 12) | (mask << 18)); + END_CS; +} + void r300_emit_scissor_state(struct r300_context* r300, unsigned size, void* state) { @@ -1176,6 +1268,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; struct r300_resource *tex; @@ -1201,6 +1294,14 @@ validate: r300_surface(fb->zsbuf)->domain); } } + /* The AA resolve buffer. */ + if (r300->aa_state.dirty) { + if (aa->dest) { + r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf, + RADEON_USAGE_WRITE, + aa->dest->domain); + } + } if (r300->textures_state.dirty) { /* ...textures... */ for (i = 0; i < texstate->count; i++) { @@ -1282,7 +1383,9 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ if (r300->screen->caps.is_r500) - dwords += 2; + dwords += 2; /* emit_index_bias */ + if (r300->screen->info.drm_minor >= 6) + dwords += 3; /* MSPOS */ return dwords; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 234e043b071..a58ab857f56 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -80,6 +80,9 @@ void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state); void r300_emit_rs_block_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_sample_mask(struct r300_context *r300, + unsigned size, void *state); + void r300_emit_scissor_state(struct r300_context* r300, unsigned size, void* state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 365dc8c3c11..10c4a30f67f 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -43,6 +43,14 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags) if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); + /* The DDX doesn't set these regs. */ + if (r300->screen->info.drm_minor >= 6) { + CS_LOCALS(r300); + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(0x66666666); + OUT_CS(0x6666666); + } + r300->flush_counter++; r300->rws->cs_flush(r300->cs, flags); r300->dirty_hw = 0; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 0842f9ad5dc..6e1b4e44ad3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -149,6 +149,8 @@ static void get_external_state( struct r300_textures_state *texstate = r300->textures_state.state; unsigned i; + state->alpha_to_one = r300->alpha_to_one && r300->msaa_enable; + for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 683fc03d523..1e79970ed0d 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1201,58 +1201,6 @@ done: r300->sprite_coord_enable = last_sprite_coord_enable; } -#if 0 -static void r300_resource_resolve(struct pipe_context *pipe, - const struct pipe_resolve_info *info) -{ - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *srcsurf, *dstsurf, surf_tmpl; - struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; - static const union pipe_color_union color; - - assert(0 && "Resource resolve is unsupported, invalid call."); - - memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = info->src.res->format; - surf_tmpl.u.tex.first_layer = - surf_tmpl.u.tex.last_layer = info->src.layer; - srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl); - /* XXX Offset both surfaces by x0,y1. */ - - surf_tmpl.format = info->dst.res->format; - surf_tmpl.u.tex.level = info->dst.level; - surf_tmpl.u.tex.first_layer = - surf_tmpl.u.tex.last_layer = info->dst.layer; - dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl); - - DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); - - /* Enable AA resolve. */ - aa->dest = r300_surface(dstsurf); - aa->aaresolve_ctl = - R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | - R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; - r300->aa_state.size = 10; - r300_mark_atom_dirty(r300, &r300->aa_state); - - /* Resolve the surface. */ - /* XXX: y1 < 0 ==> Y flip */ - r300->context.clear_render_target(pipe, - srcsurf, &color, 0, 0, - info->dst.x1 - info->dst.x0, - info->dst.y1 - info->dst.y0); - - /* Disable AA resolve. */ - aa->dest = NULL; - aa->aaresolve_ctl = 0; - r300->aa_state.size = 4; - r300_mark_atom_dirty(r300, &r300->aa_state); - - pipe_surface_reference(&srcsurf, NULL); - pipe_surface_reference(&dstsurf, NULL); -} -#endif - void r300_init_render_functions(struct r300_context *r300) { /* Set draw functions based on presence of HW TCL. */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index db8f171786d..762f6072e0c 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -392,17 +392,34 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 1: break; case 2: - case 3: case 4: case 6: - return FALSE; -#if 0 - if (usage != PIPE_BIND_RENDER_TARGET || + /* We need DRM 2.8.0. */ + if (!drm_2_8_0) { + return FALSE; + } + /* Only support R500, because I didn't test older chipsets, + * but MSAA should work there too. */ + if (!is_r500 && !debug_get_bool_option("RADEON_MSAA", FALSE)) { + return FALSE; + } + /* No texturing and scanout. */ + if (usage & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT)) { + return FALSE; + } + /* Only allow depth/stencil, RGBA8, RGBA16F */ + if (!util_format_is_depth_or_stencil(format) && !util_format_is_rgba8_variant( - util_format_description(format))) { + util_format_description(format)) && + format != PIPE_FORMAT_R16G16B16A16_FLOAT) { + return FALSE; + } + /* RGBA16F AA is only supported on R500. */ + if (format == PIPE_FORMAT_R16G16B16A16_FLOAT && !is_r500) { return FALSE; } -#endif break; default: return FALSE; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a5f96831081..1e7cff91f9a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -440,8 +440,27 @@ static void r300_bind_blend_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_blend_state *blend = (struct r300_blend_state*)state; + boolean last_alpha_to_one = r300->alpha_to_one; + boolean last_alpha_to_coverage = r300->alpha_to_coverage; UPDATE_STATE(state, r300->blend_state); + + if (!blend) + return; + + r300->alpha_to_one = blend->state.alpha_to_one; + r300->alpha_to_coverage = blend->state.alpha_to_coverage; + + if (r300->alpha_to_one != last_alpha_to_one && r300->msaa_enable && + r300->fs_status == FRAGMENT_SHADER_VALID) { + r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; + } + + if (r300->alpha_to_coverage != last_alpha_to_coverage && + r300->msaa_enable) { + r300_mark_atom_dirty(r300, &r300->dsa_state); + } } /* Free blend state. */ @@ -553,13 +572,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, } } -static void -r300_set_sample_mask(struct pipe_context *pipe, - unsigned sample_mask) -{ -} - - /* Create a new depth, stencil, and alpha state based on the CSO dsa state. * * This contains the depth buffer, stencil buffer, alpha test, and such. @@ -816,6 +828,25 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* The size of the rest of atoms stays the same. */ } +static unsigned r300_get_num_samples(struct r300_context *r300) +{ + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + unsigned num_samples; + + if (fb->nr_cbufs) + num_samples = fb->cbufs[0]->texture->nr_samples; + else if (fb->zsbuf) + num_samples = fb->zsbuf->texture->nr_samples; + else + num_samples = 1; + + if (!num_samples) + num_samples = 1; + + return num_samples; +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) @@ -911,22 +942,22 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } } - /* Set up AA config. */ - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + r300->num_samples = r300_get_num_samples(r300); - switch (state->cbufs[0]->texture->nr_samples) { + /* Set up AA config. */ + if (r300->num_samples > 1) { + switch (r300->num_samples) { case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | + R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; break; case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | + R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; break; case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE | + R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; break; } } else { @@ -1251,6 +1282,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) struct r300_rs_state* rs = (struct r300_rs_state*)state; int last_sprite_coord_enable = r300->sprite_coord_enable; boolean last_two_sided_color = r300->two_sided_color; + boolean last_msaa_enable = r300->msaa_enable; if (r300->draw && rs) { draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); @@ -1260,10 +1292,12 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; + r300->msaa_enable = rs->rs.multisample; } else { r300->polygon_offset_enabled = FALSE; r300->sprite_coord_enable = 0; r300->two_sided_color = FALSE; + r300->msaa_enable = FALSE; } UPDATE_STATE(state, r300->rs_state); @@ -1273,6 +1307,19 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) last_two_sided_color != r300->two_sided_color) { r300_mark_atom_dirty(r300, &r300->rs_block_state); } + + if (last_msaa_enable != r300->msaa_enable) { + r300_mark_atom_dirty(r300, &r300->fb_state_pipelined); + + if (r300->alpha_to_coverage) { + r300_mark_atom_dirty(r300, &r300->dsa_state); + } + + if (r300->alpha_to_one && + r300->fs_status == FRAGMENT_SHADER_VALID) { + r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY; + } + } } /* Free rasterizer state. */ @@ -1542,6 +1589,16 @@ r300_sampler_view_destroy(struct pipe_context *pipe, FREE(view); } +static void r300_set_sample_mask(struct pipe_context *pipe, + unsigned mask) +{ + struct r300_context* r300 = r300_context(pipe); + + *((unsigned*)r300->sample_mask.state) = mask; + + r300_mark_atom_dirty(r300, &r300->sample_mask); +} + static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 568558f0adb..e18dcf8e1cc 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -955,10 +955,6 @@ r300_texture_create_object(struct r300_screen *rscreen, struct radeon_winsys *rws = rscreen->rws; struct r300_resource *tex = NULL; - if (base->nr_samples > 1) { - goto fail; - } - tex = CALLOC_STRUCT(r300_resource); if (!tex) { goto fail; diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 04d439bcc1f..9493eb19c28 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -56,7 +56,6 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } }; - static const unsigned aa_block[2] = {4, 8}; unsigned tile = 0; unsigned pixsize = util_format_get_blocksize(format); @@ -65,22 +64,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, assert(pixsize <= 16); assert(dim <= DIM_HEIGHT); - if (num_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - tile = aa_block[dim]; - /* XXX FP16 AA. */ - } else { - /* Standard alignment. */ - tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; - if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { - int align; - int h_tile; - h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; - align = 64 / (pixsize * h_tile); - if (tile < align) - tile = align; - } + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { + int align; + int h_tile; + h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; + align = 64 / (pixsize * h_tile); + if (tile < align) + tile = align; } assert(tile); @@ -95,6 +86,10 @@ static boolean r300_texture_macro_switch(struct r300_resource *tex, { unsigned tile, texdim; + if (tex->b.b.nr_samples > 1) { + return TRUE; + } + tile = r300_get_pixel_alignment(tex->b.b.format, tex->b.b.nr_samples, tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0); if (dim == DIM_WIDTH) { @@ -248,7 +243,7 @@ static void r300_setup_miptree(struct r300_screen *screen, layer_size = stride * nblocksy; - if (base->nr_samples) { + if (base->nr_samples > 1) { layer_size *= base->nr_samples; } @@ -423,6 +418,12 @@ static void r300_setup_tiling(struct r300_screen *screen, boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + if (tex->b.b.nr_samples > 1) { + tex->tex.microtile = RADEON_LAYOUT_TILED; + tex->tex.macrotile[0] = RADEON_LAYOUT_TILED; + return; + } + tex->tex.microtile = RADEON_LAYOUT_LINEAR; tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 6ad08c6740e..436b30445b2 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -52,11 +52,31 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, struct r300_transfer *r300transfer) { struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; - struct pipe_resource *tex = transfer->resource; + struct pipe_resource *src = transfer->resource; + struct pipe_resource *dst = &r300transfer->linear_texture->b.b; - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b, 0, - 0, 0, 0, - tex, transfer->level, &transfer->box); + if (src->nr_samples <= 1) { + ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0, + src, transfer->level, &transfer->box); + } else { + /* Resolve the resource. */ + struct pipe_blit_info blit; + + memset(&blit, 0, sizeof(blit)); + blit.src.resource = src; + blit.src.format = src->format; + blit.src.level = transfer->level; + blit.src.box = transfer->box; + blit.dst.resource = dst; + blit.dst.format = dst->format; + blit.dst.box.width = transfer->box.width; + blit.dst.box.height = transfer->box.height; + blit.dst.box.depth = transfer->box.depth; + blit.mask = PIPE_MASK_RGBA; + blit.filter = PIPE_TEX_FILTER_NEAREST; + + ctx->blit(ctx, &blit); + } } /* Copy a detiled texture to a tiled one. */