r300g: implement MSAA
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Jan 2013 05:21:49 +0000 (06:21 +0100)
committerMarek Olšák <maraeo@gmail.com>
Sun, 6 Jan 2013 13:44:12 +0000 (14:44 +0100)
This is not as optimized as r600g - the MSAA compression is missing,
so r300g needs a lot of bandwidth (more than r600g to do the same thing).
However, if the bandwidth is not an issue for you, you can enjoy this
unoptimized MSAA support.
The only other missing optimization for MSAA is the fast color clear.

MSAA is enabled on r500 only, because that's the only GPU family I tested.
That said, MSAA should work on r300 and r400 as well (but you must set
RADEON_MSAA=1 to allow it, then turn MSAA on in your app or set GALLIUM_MSAA=n,
n >= 2, n <= 6)
I will enable the support by default on r300-r400 once someone (other than me)
tests those chipsets with piglit.

The supported modes are 2x, 4x, 6x.

The supported MSAA formats are RGBA8, BGRA8, and RGBA16F (r500 only).
Those 3 formats are used for all GL internal formats.

Tested with piglit. (I have ported all MSAA tests to GL2.1)

14 files changed:
src/gallium/auxiliary/util/u_blitter.c
src/gallium/drivers/r300/r300_blit.c
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_emit.h
src/gallium/drivers/r300/r300_flush.c
src/gallium/drivers/r300/r300_fs.c
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_screen.c
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_texture.c
src/gallium/drivers/r300/r300_texture_desc.c
src/gallium/drivers/r300/r300_transfer.c

index 1b9a3f438ac31ef85a2d38cf1182f6c007aae287..95224020c794f0112c97cab1f2baf1bbe6053775 100644 (file)
@@ -1816,7 +1816,8 @@ void util_blitter_custom_color(struct blitter_context *blitter,
    blitter_disable_render_cond(ctx);
 
    /* bind states */
-   pipe->bind_blend_state(pipe, custom_blend);
+   pipe->bind_blend_state(pipe, custom_blend ? custom_blend
+                                             : ctx->blend[PIPE_MASK_RGBA]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1, FALSE));
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
index 9fff3700ed3f5dbc369e91d7e6744f2b4f1a0ca4..46578318af2c51bd1f677a69b9069a4db17e41f0 100644 (file)
@@ -23,6 +23,7 @@
 #include "r300_context.h"
 #include "r300_emit.h"
 #include "r300_texture.h"
+#include "r300_reg.h"
 
 #include "util/u_format.h"
 #include "util/u_pack_color.h"
@@ -66,6 +67,7 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
     util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
     util_blitter_save_viewport(r300->blitter, &r300->viewport);
     util_blitter_save_scissor(r300->blitter, r300->scissor_state.state);
+    util_blitter_save_sample_mask(r300->blitter, *(unsigned*)r300->sample_mask.state);
     util_blitter_save_vertex_buffer_slot(r300->blitter, r300->vertex_buffer);
     util_blitter_save_vertex_elements(r300->blitter, r300->velems);
 
@@ -478,6 +480,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
         return;
     }
 
+    /* Can't read MSAA textures. */
+    if (src->nr_samples > 1 || dst->nr_samples > 1) {
+        return;
+    }
+
     /* The code below changes the texture format so that the copy can be done
      * on hardware. E.g. depth-stencil surfaces are copied as RGBA
      * colorbuffers. */
@@ -595,20 +602,141 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
     pipe_sampler_view_reference(&src_view, NULL);
 }
 
+static boolean r300_is_simple_msaa_resolve(const struct pipe_blit_info *info)
+{
+    unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
+    unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
+
+    return info->dst.resource->format == info->src.resource->format &&
+           info->dst.resource->format == info->dst.format &&
+           info->src.resource->format == info->src.format &&
+           !info->scissor_enable &&
+           info->mask == PIPE_MASK_RGBA &&
+           dst_width == info->src.resource->width0 &&
+           dst_height == info->src.resource->height0 &&
+           info->dst.box.x == 0 &&
+           info->dst.box.y == 0 &&
+           info->dst.box.width == dst_width &&
+           info->dst.box.height == dst_height &&
+           info->src.box.x == 0 &&
+           info->src.box.y == 0 &&
+           info->src.box.width == dst_width &&
+           info->src.box.height == dst_height;
+}
+
+static void r300_simple_msaa_resolve(struct pipe_context *pipe,
+                                     struct pipe_resource *dst,
+                                     unsigned dst_level,
+                                     unsigned dst_layer,
+                                     struct pipe_resource *src,
+                                     enum pipe_format format)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_surface *srcsurf, *dstsurf;
+    struct pipe_surface surf_tmpl;
+    struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
+
+    memset(&surf_tmpl, 0, sizeof(surf_tmpl));
+    surf_tmpl.format = format;
+    srcsurf = r300_surface(pipe->create_surface(pipe, src, &surf_tmpl));
+
+    surf_tmpl.format = format;
+    surf_tmpl.u.tex.level = dst_level;
+    surf_tmpl.u.tex.first_layer =
+    surf_tmpl.u.tex.last_layer = dst_layer;
+    dstsurf = r300_surface(pipe->create_surface(pipe, dst, &surf_tmpl));
+
+    /* COLORPITCH should contain the tiling info of the resolve buffer.
+     * The tiling of the AA buffer isn't programmable anyway. */
+    srcsurf->pitch &= ~(R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3));
+    srcsurf->pitch |= dstsurf->pitch & (R300_COLOR_TILE(1) | R300_COLOR_MICROTILE(3));
+
+    /* Enable AA resolve. */
+    aa->dest = dstsurf;
+    r300->aa_state.size = 8;
+    r300_mark_atom_dirty(r300, &r300->aa_state);
+
+    /* Resolve the surface. */
+    r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+    util_blitter_custom_color(r300->blitter, &srcsurf->base, NULL);
+    r300_blitter_end(r300);
+
+    /* Disable AA resolve. */
+    aa->dest = NULL;
+    r300->aa_state.size = 4;
+    r300_mark_atom_dirty(r300, &r300->aa_state);
+
+    pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
+    pipe_surface_reference((struct pipe_surface**)&dstsurf, NULL);
+}
+
+static void r300_msaa_resolve(struct pipe_context *pipe,
+                              const struct pipe_blit_info *info)
+{
+    struct r300_context *r300 = r300_context(pipe);
+    struct pipe_screen *screen = pipe->screen;
+    struct pipe_resource *tmp, templ;
+    struct pipe_blit_info blit;
+
+    assert(info->src.level == 0);
+    assert(info->src.box.z == 0);
+    assert(info->src.box.depth == 1);
+    assert(info->dst.box.depth == 1);
+
+    if (r300_is_simple_msaa_resolve(info)) {
+        r300_simple_msaa_resolve(pipe, info->dst.resource, info->dst.level,
+                                 info->dst.box.z, info->src.resource,
+                                 info->src.format);
+        return;
+    }
+
+    /* resolve into a temporary texture, then blit */
+    memset(&templ, 0, sizeof(templ));
+    templ.target = PIPE_TEXTURE_2D;
+    templ.format = info->src.resource->format;
+    templ.width0 = info->src.resource->width0;
+    templ.height0 = info->src.resource->height0;
+    templ.depth0 = 1;
+    templ.array_size = 1;
+    templ.usage = PIPE_USAGE_STATIC;
+
+    tmp = screen->resource_create(screen, &templ);
+
+    /* resolve */
+    r300_simple_msaa_resolve(pipe, tmp, 0, 0, info->src.resource,
+                             info->src.format);
+
+    /* blit */
+    blit = *info;
+    blit.src.resource = tmp;
+    blit.src.box.z = 0;
+
+    r300_blitter_begin(r300, R300_BLIT);
+    util_blitter_blit(r300->blitter, &blit);
+    r300_blitter_end(r300);
+
+    pipe_resource_reference(&tmp, NULL);
+}
+
 static void r300_blit(struct pipe_context *pipe,
-                      const struct pipe_blit_info *blit_info)
+                      const struct pipe_blit_info *blit)
 {
     struct r300_context *r300 = r300_context(pipe);
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
-    struct pipe_blit_info info = *blit_info;
+    struct pipe_blit_info info = *blit;
 
-    /* Decompress ZMASK. */
-    if (r300->zmask_in_use && !r300->locked_zbuffer) {
-        if (fb->zsbuf->texture == info.src.resource ||
-            fb->zsbuf->texture == info.dst.resource) {
-            r300_decompress_zmask(r300);
-        }
+    /* MSAA resolve. */
+    if (info.src.resource->nr_samples > 1 &&
+        info.dst.resource->nr_samples <= 1 &&
+        !util_format_is_depth_or_stencil(info.src.resource->format)) {
+        r300_msaa_resolve(pipe, &info);
+        return;
+    }
+
+    /* Can't read MSAA textures. */
+    if (info.src.resource->nr_samples > 1) {
+        return;
     }
 
     /* Blit a combined depth-stencil resource as color.
@@ -616,12 +744,29 @@ static void r300_blit(struct pipe_context *pipe,
     if ((info.mask & PIPE_MASK_S) &&
         info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
         info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
-        info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM;
-        info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
-        if (info.mask & PIPE_MASK_Z) {
-            info.mask = PIPE_MASK_RGBA; /* depth+stencil */
+        if (info.dst.resource->nr_samples > 1) {
+            /* Cannot do that with MSAA buffers. */
+            info.mask &= ~PIPE_MASK_S;
+            if (!(info.mask & PIPE_MASK_Z)) {
+                return;
+            }
         } else {
-            info.mask = PIPE_MASK_B; /* stencil only */
+            /* Single-sample buffer. */
+            info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+            info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+            if (info.mask & PIPE_MASK_Z) {
+                info.mask = PIPE_MASK_RGBA; /* depth+stencil */
+            } else {
+                info.mask = PIPE_MASK_B; /* stencil only */
+            }
+        }
+    }
+
+    /* Decompress ZMASK. */
+    if (r300->zmask_in_use && !r300->locked_zbuffer) {
+        if (fb->zsbuf->texture == info.src.resource ||
+            fb->zsbuf->texture == info.dst.resource) {
+            r300_decompress_zmask(r300);
         }
     }
 
index 8178c394d07c27e74ef9db323f8e5ef75a263324..b498454561d868bb718a6a36b327a16c4a64b4b3 100644 (file)
@@ -105,6 +105,7 @@ static void r300_destroy_context(struct pipe_context* context)
         FREE(r300->hyperz_state.state);
         FREE(r300->invariant_state.state);
         FREE(r300->rs_block_state.state);
+        FREE(r300->sample_mask.state);
         FREE(r300->scissor_state.state);
         FREE(r300->textures_state.state);
         FREE(r300->vap_invariant_state.state);
@@ -175,9 +176,10 @@ static boolean r300_setup_atoms(struct r300_context* r300)
     R300_INIT_ATOM(blend_state, 8);
     R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
     /* SC. */
+    R300_INIT_ATOM(sample_mask, 2);
     R300_INIT_ATOM(scissor_state, 3);
     /* GB, FG, GA, SU, SC, RB3D. */
-    R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0));
+    R300_INIT_ATOM(invariant_state, 14 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0));
     /* VAP. */
     R300_INIT_ATOM(viewport_state, 9);
     R300_INIT_ATOM(pvs_flush, 2);
@@ -224,6 +226,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
     R300_ALLOC_ATOM(ztop_state, r300_ztop_state);
     R300_ALLOC_ATOM(fb_state, pipe_framebuffer_state);
     R300_ALLOC_ATOM(gpu_flush, pipe_framebuffer_state);
+    r300->sample_mask.state = malloc(4);
     R300_ALLOC_ATOM(scissor_state, pipe_scissor_state);
     R300_ALLOC_ATOM(rs_block_state, r300_rs_block);
     R300_ALLOC_ATOM(fs_constants, r300_constant_buffer);
@@ -270,6 +273,7 @@ static void r300_init_states(struct pipe_context *pipe)
     pipe->set_blend_color(pipe, &bc);
     pipe->set_clip_state(pipe, &cs);
     pipe->set_scissor_state(pipe, &ss);
+    pipe->set_sample_mask(pipe, ~0);
 
     /* Initialize the GPU flush. */
     {
@@ -317,7 +321,6 @@ static void r300_init_states(struct pipe_context *pipe)
         OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
         OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0);
         OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525);
-        OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff);
 
         if (r300->screen->caps.is_rv350) {
             OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
index cddd91d7ab80c1237299d82ffadc69d3e6bc2c0a..1b912c3eeeeb8513bc4604e0edde65ca2a57e4a6 100644 (file)
@@ -73,7 +73,6 @@ struct r300_aa_state {
     struct r300_surface *dest;
 
     uint32_t aa_config;
-    uint32_t aaresolve_ctl;
 };
 
 struct r300_blend_state {
@@ -499,6 +498,8 @@ struct r300_context {
     struct r300_atom blend_color_state;
     /* Scissor state. */
     struct r300_atom scissor_state;
+    /* Sample mask. */
+    struct r300_atom sample_mask;
     /* Invariant state. This must be emitted to get the engine started. */
     struct r300_atom invariant_state;
     /* Viewport state. */
@@ -573,6 +574,10 @@ struct r300_context {
     enum r300_fs_validity_status fs_status;
     /* Framebuffer multi-write. */
     boolean fb_multiwrite;
+    unsigned num_samples;
+    boolean msaa_enable;
+    boolean alpha_to_one;
+    boolean alpha_to_coverage;
 
     void *dsa_decompress_zmask;
 
index 1b9de40afaad197e4c74b7145627af6bb910adca..1700cbb4667447f2ca664515bf6d35c1570557ec 100644 (file)
@@ -93,6 +93,13 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
         }
     }
 
+    /* Setup alpha-to-coverage. */
+    if (r300->alpha_to_coverage && r300->msaa_enable) {
+        /* Always set 3/6, it improves precision even for 2x and 4x MSAA. */
+        alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE |
+                      R300_FG_ALPHA_FUNC_CFG_3_OF_6;
+    }
+
     OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func);
     WRITE_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2);
 }
@@ -366,12 +373,16 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
     OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
 
     if (aa->dest) {
-        OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset);
+        OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3);
+        OUT_CS(aa->dest->offset);
+        OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK);
+        OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
+               R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE);
         OUT_CS_RELOC(aa->dest);
-        OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch);
+    } else {
+        OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0);
     }
 
-    OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
     END_CS;
 }
 
@@ -475,12 +486,85 @@ void r300_emit_hyperz_end(struct r300_context *r300)
     r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
 }
 
+#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x)  \
+    (((x0) & 0xf) | (((y0) & 0xf) << 4) |                 \
+    (((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) |          \
+    (((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) |         \
+    (((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28))
+
+static unsigned r300_get_mspos(int index, unsigned *p)
+{
+    unsigned reg, i, distx, disty, dist;
+
+    if (index == 0) {
+        /* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles,
+         * followed by a (Y,X) pair containing the minimum distance from the pixel
+         * edge:
+         *     X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X
+         *
+         * There is a quirk when setting D0_X. The value represents the distance
+         * from the left edge of the pixel quad to the first sample in subpixels.
+         * All values less than eight should use the actual value, but „7‟ should
+         * be used for the distance „8‟. The hardware will convert 7 into 8 internally.
+         */
+        distx = 11;
+        for (i = 0; i < 12; i += 2) {
+            if (p[i] < distx)
+                distx = p[i];
+        }
+
+        disty = 11;
+        for (i = 1; i < 12; i += 2) {
+            if (p[i] < disty)
+                disty = p[i];
+        }
+
+        if (distx == 8)
+            distx = 7;
+
+        reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx);
+    } else {
+        /* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles,
+         * followed by the minimum distance from the pixel edge (not sure if X or Y):
+         *     X3, Y3, X4, Y4, X5, Y5, D1
+         */
+        dist = 11;
+        for (i = 0; i < 12; i++) {
+            if (p[i] < dist)
+                dist = p[i];
+        }
+
+        reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0);
+    }
+    return reg;
+}
+
 void r300_emit_fb_state_pipelined(struct r300_context *r300,
                                   unsigned size, void *state)
 {
+    /* The sample coordinates are in the range [0,11], because
+     * GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision.
+     *
+     * Some sample coordinates reach to neighboring pixels and should not be used.
+     * (e.g. Y=11)
+     *
+     * The unused samples must be set to the positions of other valid samples. */
+    static unsigned sample_locs_1x[12] = {
+        6,6,  6,6,  6,6,  6,6,  6,6,  6,6
+    };
+    static unsigned sample_locs_2x[12] = {
+        3,9,  9,3,  9,3,  9,3,  9,3,  9,3
+    };
+    static unsigned sample_locs_4x[12] = {
+        4,4,  8,8,  2,10,  10,2,  10,2,  10,2
+    };
+    static unsigned sample_locs_6x[12] = {
+        3,1,  7,3,  11,5,  1,7,  5,9,  9,10
+    };
+
     struct pipe_framebuffer_state* fb =
             (struct pipe_framebuffer_state*)r300->fb_state.state;
-    unsigned i, num_cbufs = fb->nr_cbufs;
+    unsigned i, num_samples, num_cbufs = fb->nr_cbufs;
     unsigned mspos0, mspos1;
     CS_LOCALS(r300);
 
@@ -509,32 +593,28 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
 
     /* Multisampling. Depends on framebuffer sample count.
      * These are pipelined regs and as such cannot be moved
-     * to the AA state. */
-    mspos0 = 0x66666666;
-    mspos1 = 0x6666666;
-
-    if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
-        /* Subsample placement. These may not be optimal. */
-        switch (fb->cbufs[0]->texture->nr_samples) {
-        case 2:
-            mspos0 = 0x33996633;
-            mspos1 = 0x6666663;
-            break;
-        case 3:
-            mspos0 = 0x33936933;
-            mspos1 = 0x6666663;
-            break;
-        case 4:
-            mspos0 = 0x33939933;
-            mspos1 = 0x3966663;
-            break;
-        case 6:
-            mspos0 = 0x22a2aa22;
-            mspos1 = 0x2a65672;
-            break;
-        default:
-            debug_printf("r300: Bad number of multisamples!\n");
-        }
+     * to the AA state.
+     */
+    num_samples = r300->msaa_enable ? r300->num_samples : 1;
+
+    /* Sample positions. */
+    switch (num_samples) {
+    default:
+        mspos0 = r300_get_mspos(0, sample_locs_1x);
+        mspos1 = r300_get_mspos(1, sample_locs_1x);
+        break;
+    case 2:
+        mspos0 = r300_get_mspos(0, sample_locs_2x);
+        mspos1 = r300_get_mspos(1, sample_locs_2x);
+        break;
+    case 4:
+        mspos0 = r300_get_mspos(0, sample_locs_4x);
+        mspos1 = r300_get_mspos(1, sample_locs_4x);
+        break;
+    case 6:
+        mspos0 = r300_get_mspos(0, sample_locs_6x);
+        mspos1 = r300_get_mspos(1, sample_locs_6x);
+        break;
     }
 
     OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
@@ -751,6 +831,18 @@ void r300_emit_rs_block_state(struct r300_context* r300,
     END_CS;
 }
 
+void r300_emit_sample_mask(struct r300_context *r300,
+                           unsigned size, void *state)
+{
+    unsigned mask = (*(unsigned*)state) & ((1 << 6)-1);
+    CS_LOCALS(r300);
+
+    BEGIN_CS(size);
+    OUT_CS_REG(R300_SC_SCREENDOOR,
+               mask | (mask << 6) | (mask << 12) | (mask << 18));
+    END_CS;
+}
+
 void r300_emit_scissor_state(struct r300_context* r300,
                              unsigned size, void* state)
 {
@@ -1176,6 +1268,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300,
 {
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
+    struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
     struct r300_textures_state *texstate =
         (struct r300_textures_state*)r300->textures_state.state;
     struct r300_resource *tex;
@@ -1201,6 +1294,14 @@ validate:
                                     r300_surface(fb->zsbuf)->domain);
         }
     }
+    /* The AA resolve buffer. */
+    if (r300->aa_state.dirty) {
+        if (aa->dest) {
+            r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
+                                    RADEON_USAGE_WRITE,
+                                    aa->dest->domain);
+        }
+    }
     if (r300->textures_state.dirty) {
         /* ...textures... */
         for (i = 0; i < texstate->count; i++) {
@@ -1282,7 +1383,9 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
     dwords += 26; /* emit_query_end */
     dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
     if (r300->screen->caps.is_r500)
-        dwords += 2;
+        dwords += 2; /* emit_index_bias */
+    if (r300->screen->info.drm_minor >= 6)
+        dwords += 3; /* MSPOS */
 
     return dwords;
 }
index 234e043b0712fad3be43ca5717afdb7f1048feab..a58ab857f563c255c922ef53239b3806db7ac8f1 100644 (file)
@@ -80,6 +80,9 @@ void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state);
 void r300_emit_rs_block_state(struct r300_context* r300,
                               unsigned size, void* state);
 
+void r300_emit_sample_mask(struct r300_context *r300,
+                           unsigned size, void *state);
+
 void r300_emit_scissor_state(struct r300_context* r300,
                              unsigned size, void* state);
 
index 365dc8c3c11ff53723b4ce1a43a153494ed300b0..10c4a30f67fe7247330b0abd01398cd2adf00eea 100644 (file)
@@ -43,6 +43,14 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags)
     if (r300->screen->caps.is_r500)
         r500_emit_index_bias(r300, 0);
 
+    /* The DDX doesn't set these regs. */
+    if (r300->screen->info.drm_minor >= 6) {
+        CS_LOCALS(r300);
+        OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+        OUT_CS(0x66666666);
+        OUT_CS(0x6666666);
+    }
+
     r300->flush_counter++;
     r300->rws->cs_flush(r300->cs, flags);
     r300->dirty_hw = 0;
index 0842f9ad5dc741662d175f88df21a41354bd8a92..6e1b4e44ad3ef3e17736b1c29af2970d32b54854 100644 (file)
@@ -149,6 +149,8 @@ static void get_external_state(
     struct r300_textures_state *texstate = r300->textures_state.state;
     unsigned i;
 
+    state->alpha_to_one = r300->alpha_to_one && r300->msaa_enable;
+
     for (i = 0; i < texstate->sampler_state_count; i++) {
         struct r300_sampler_state *s = texstate->sampler_states[i];
         struct r300_sampler_view *v = texstate->sampler_views[i];
index 683fc03d523882a4439c188d2d7cac330053c025..1e79970ed0dc5487e3f597d4f665ef32440acce7 100644 (file)
@@ -1201,58 +1201,6 @@ done:
     r300->sprite_coord_enable = last_sprite_coord_enable;
 }
 
-#if 0
-static void r300_resource_resolve(struct pipe_context *pipe,
-                                  const struct pipe_resolve_info *info)
-{
-    struct r300_context *r300 = r300_context(pipe);
-    struct pipe_surface *srcsurf, *dstsurf, surf_tmpl;
-    struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
-    static const union pipe_color_union color;
-
-    assert(0 && "Resource resolve is unsupported, invalid call.");
-
-    memset(&surf_tmpl, 0, sizeof(surf_tmpl));
-    surf_tmpl.format = info->src.res->format;
-    surf_tmpl.u.tex.first_layer =
-    surf_tmpl.u.tex.last_layer = info->src.layer;
-    srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl);
-    /* XXX Offset both surfaces by x0,y1. */
-
-    surf_tmpl.format = info->dst.res->format;
-    surf_tmpl.u.tex.level = info->dst.level;
-    surf_tmpl.u.tex.first_layer =
-    surf_tmpl.u.tex.last_layer = info->dst.layer;
-    dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl);
-
-    DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
-
-    /* Enable AA resolve. */
-    aa->dest = r300_surface(dstsurf);
-    aa->aaresolve_ctl =
-        R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
-        R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
-    r300->aa_state.size = 10;
-    r300_mark_atom_dirty(r300, &r300->aa_state);
-
-    /* Resolve the surface. */
-    /* XXX: y1 < 0 ==> Y flip */
-    r300->context.clear_render_target(pipe,
-                                      srcsurf, &color, 0, 0,
-                                      info->dst.x1 - info->dst.x0,
-                                      info->dst.y1 - info->dst.y0);
-
-    /* Disable AA resolve. */
-    aa->dest = NULL;
-    aa->aaresolve_ctl = 0;
-    r300->aa_state.size = 4;
-    r300_mark_atom_dirty(r300, &r300->aa_state);
-
-    pipe_surface_reference(&srcsurf, NULL);
-    pipe_surface_reference(&dstsurf, NULL);
-}
-#endif
-
 void r300_init_render_functions(struct r300_context *r300)
 {
     /* Set draw functions based on presence of HW TCL. */
index db8f171786d00ddc73b29b0fcd08495a91d4765f..762f6072e0c6619e8b44a9a93baafc436952381d 100644 (file)
@@ -392,17 +392,34 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
         case 1:
             break;
         case 2:
-        case 3:
         case 4:
         case 6:
-            return FALSE;
-#if 0
-            if (usage != PIPE_BIND_RENDER_TARGET ||
+            /* We need DRM 2.8.0. */
+            if (!drm_2_8_0) {
+                return FALSE;
+            }
+            /* Only support R500, because I didn't test older chipsets,
+             * but MSAA should work there too. */
+            if (!is_r500 && !debug_get_bool_option("RADEON_MSAA", FALSE)) {
+                return FALSE;
+            }
+            /* No texturing and scanout. */
+            if (usage & (PIPE_BIND_SAMPLER_VIEW |
+                         PIPE_BIND_DISPLAY_TARGET |
+                         PIPE_BIND_SCANOUT)) {
+                return FALSE;
+            }
+            /* Only allow depth/stencil, RGBA8, RGBA16F */
+            if (!util_format_is_depth_or_stencil(format) &&
                 !util_format_is_rgba8_variant(
-                    util_format_description(format))) {
+                    util_format_description(format)) &&
+                format != PIPE_FORMAT_R16G16B16A16_FLOAT) {
+                return FALSE;
+            }
+            /* RGBA16F AA is only supported on R500. */
+            if (format == PIPE_FORMAT_R16G16B16A16_FLOAT && !is_r500) {
                 return FALSE;
             }
-#endif
             break;
         default:
             return FALSE;
index a5f968310813bf4f603200ccb9483d47a435a2cf..1e7cff91f9a56f29659fb34451323a35167e2de2 100644 (file)
@@ -440,8 +440,27 @@ static void r300_bind_blend_state(struct pipe_context* pipe,
                                   void* state)
 {
     struct r300_context* r300 = r300_context(pipe);
+    struct r300_blend_state *blend  = (struct r300_blend_state*)state;
+    boolean last_alpha_to_one = r300->alpha_to_one;
+    boolean last_alpha_to_coverage = r300->alpha_to_coverage;
 
     UPDATE_STATE(state, r300->blend_state);
+
+    if (!blend)
+        return;
+
+    r300->alpha_to_one = blend->state.alpha_to_one;
+    r300->alpha_to_coverage = blend->state.alpha_to_coverage;
+
+    if (r300->alpha_to_one != last_alpha_to_one && r300->msaa_enable &&
+        r300->fs_status == FRAGMENT_SHADER_VALID) {
+        r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
+    }
+
+    if (r300->alpha_to_coverage != last_alpha_to_coverage &&
+        r300->msaa_enable) {
+        r300_mark_atom_dirty(r300, &r300->dsa_state);
+    }
 }
 
 /* Free blend state. */
@@ -553,13 +572,6 @@ static void r300_set_clip_state(struct pipe_context* pipe,
     }
 }
 
-static void
-r300_set_sample_mask(struct pipe_context *pipe,
-                     unsigned sample_mask)
-{
-}
-
-
 /* Create a new depth, stencil, and alpha state based on the CSO dsa state.
  *
  * This contains the depth buffer, stencil buffer, alpha test, and such.
@@ -816,6 +828,25 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
     /* The size of the rest of atoms stays the same. */
 }
 
+static unsigned r300_get_num_samples(struct r300_context *r300)
+{
+    struct pipe_framebuffer_state* fb =
+            (struct pipe_framebuffer_state*)r300->fb_state.state;
+    unsigned num_samples;
+
+    if (fb->nr_cbufs)
+        num_samples = fb->cbufs[0]->texture->nr_samples;
+    else if (fb->zsbuf)
+        num_samples = fb->zsbuf->texture->nr_samples;
+    else
+        num_samples = 1;
+
+    if (!num_samples)
+        num_samples = 1;
+
+    return num_samples;
+}
+
 static void
 r300_set_framebuffer_state(struct pipe_context* pipe,
                            const struct pipe_framebuffer_state* state)
@@ -911,22 +942,22 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
         }
     }
 
-    /* Set up AA config. */
-    if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
-        aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+    r300->num_samples = r300_get_num_samples(r300);
 
-        switch (state->cbufs[0]->texture->nr_samples) {
+    /* Set up AA config. */
+    if (r300->num_samples > 1) {
+        switch (r300->num_samples) {
         case 2:
-            aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
-            break;
-        case 3:
-            aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
+                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
             break;
         case 4:
-            aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
+                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
             break;
         case 6:
-            aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+            aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE |
+                            R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
             break;
         }
     } else {
@@ -1251,6 +1282,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
     struct r300_rs_state* rs = (struct r300_rs_state*)state;
     int last_sprite_coord_enable = r300->sprite_coord_enable;
     boolean last_two_sided_color = r300->two_sided_color;
+    boolean last_msaa_enable = r300->msaa_enable;
 
     if (r300->draw && rs) {
         draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
@@ -1260,10 +1292,12 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
         r300->polygon_offset_enabled = rs->polygon_offset_enable;
         r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
         r300->two_sided_color = rs->rs.light_twoside;
+        r300->msaa_enable = rs->rs.multisample;
     } else {
         r300->polygon_offset_enabled = FALSE;
         r300->sprite_coord_enable = 0;
         r300->two_sided_color = FALSE;
+        r300->msaa_enable = FALSE;
     }
 
     UPDATE_STATE(state, r300->rs_state);
@@ -1273,6 +1307,19 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
         last_two_sided_color != r300->two_sided_color) {
         r300_mark_atom_dirty(r300, &r300->rs_block_state);
     }
+
+    if (last_msaa_enable != r300->msaa_enable) {
+        r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
+
+        if (r300->alpha_to_coverage) {
+            r300_mark_atom_dirty(r300, &r300->dsa_state);
+        }
+
+        if (r300->alpha_to_one &&
+            r300->fs_status == FRAGMENT_SHADER_VALID) {
+            r300->fs_status = FRAGMENT_SHADER_MAYBE_DIRTY;
+        }
+    }
 }
 
 /* Free rasterizer state. */
@@ -1542,6 +1589,16 @@ r300_sampler_view_destroy(struct pipe_context *pipe,
    FREE(view);
 }
 
+static void r300_set_sample_mask(struct pipe_context *pipe,
+                                 unsigned mask)
+{
+    struct r300_context* r300 = r300_context(pipe);
+
+    *((unsigned*)r300->sample_mask.state) = mask;
+
+    r300_mark_atom_dirty(r300, &r300->sample_mask);
+}
+
 static void r300_set_scissor_state(struct pipe_context* pipe,
                                    const struct pipe_scissor_state* state)
 {
index 568558f0adb34da435dab165dcf7e0d57ec4413a..e18dcf8e1cc7be6f774fcb7455ec93e13fbb4a5f 100644 (file)
@@ -955,10 +955,6 @@ r300_texture_create_object(struct r300_screen *rscreen,
     struct radeon_winsys *rws = rscreen->rws;
     struct r300_resource *tex = NULL;
 
-    if (base->nr_samples > 1) {
-        goto fail;
-    }
-
     tex = CALLOC_STRUCT(r300_resource);
     if (!tex) {
         goto fail;
index 04d439bcc1f854c820ed07917368879aa93f35bc..9493eb19c289a85e204fff5b1cdc5b112d3980d7 100644 (file)
@@ -56,7 +56,6 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
         }
     };
 
-    static const unsigned aa_block[2] = {4, 8};
     unsigned tile = 0;
     unsigned pixsize = util_format_get_blocksize(format);
 
@@ -65,22 +64,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
     assert(pixsize <= 16);
     assert(dim <= DIM_HEIGHT);
 
-    if (num_samples > 1) {
-        /* Multisampled textures have their own alignment scheme. */
-        if (pixsize == 4)
-            tile = aa_block[dim];
-        /* XXX FP16 AA. */
-    } else {
-        /* Standard alignment. */
-        tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
-        if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
-            int align;
-            int h_tile;
-            h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
-            align = 64 / (pixsize * h_tile);
-            if (tile < align)
-                tile = align;
-        }
+    tile = table[macrotile][util_logbase2(pixsize)][microtile][dim];
+    if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) {
+        int align;
+        int h_tile;
+        h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT];
+        align = 64 / (pixsize * h_tile);
+        if (tile < align)
+            tile = align;
     }
 
     assert(tile);
@@ -95,6 +86,10 @@ static boolean r300_texture_macro_switch(struct r300_resource *tex,
 {
     unsigned tile, texdim;
 
+    if (tex->b.b.nr_samples > 1) {
+        return TRUE;
+    }
+
     tile = r300_get_pixel_alignment(tex->b.b.format, tex->b.b.nr_samples,
                                     tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0);
     if (dim == DIM_WIDTH) {
@@ -248,7 +243,7 @@ static void r300_setup_miptree(struct r300_screen *screen,
 
         layer_size = stride * nblocksy;
 
-        if (base->nr_samples) {
+        if (base->nr_samples > 1) {
             layer_size *= base->nr_samples;
         }
 
@@ -423,6 +418,12 @@ static void r300_setup_tiling(struct r300_screen *screen,
     boolean is_zb = util_format_is_depth_or_stencil(format);
     boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
 
+    if (tex->b.b.nr_samples > 1) {
+        tex->tex.microtile = RADEON_LAYOUT_TILED;
+        tex->tex.macrotile[0] = RADEON_LAYOUT_TILED;
+        return;
+    }
+
     tex->tex.microtile = RADEON_LAYOUT_LINEAR;
     tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR;
 
index 6ad08c6740e20392dd9cf3af5e0217dc6cf9fa71..436b30445b273269c75c96d5dae4fc3d7a6e9502 100644 (file)
@@ -52,11 +52,31 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
                                          struct r300_transfer *r300transfer)
 {
     struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
-    struct pipe_resource *tex = transfer->resource;
+    struct pipe_resource *src = transfer->resource;
+    struct pipe_resource *dst = &r300transfer->linear_texture->b.b;
 
-    ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b, 0,
-                              0, 0, 0,
-                              tex, transfer->level, &transfer->box);
+    if (src->nr_samples <= 1) {
+        ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
+                                  src, transfer->level, &transfer->box);
+    } else {
+        /* Resolve the resource. */
+        struct pipe_blit_info blit;
+
+        memset(&blit, 0, sizeof(blit));
+        blit.src.resource = src;
+        blit.src.format = src->format;
+        blit.src.level = transfer->level;
+        blit.src.box = transfer->box;
+        blit.dst.resource = dst;
+        blit.dst.format = dst->format;
+        blit.dst.box.width = transfer->box.width;
+        blit.dst.box.height = transfer->box.height;
+        blit.dst.box.depth = transfer->box.depth;
+        blit.mask = PIPE_MASK_RGBA;
+        blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+        ctx->blit(ctx, &blit);
+    }
 }
 
 /* Copy a detiled texture to a tiled one. */