r600g: implement texturing with 8x MSAA compressed surfaces for Evergreen
authorMarek Olšák <maraeo@gmail.com>
Fri, 12 Oct 2012 16:46:32 +0000 (18:46 +0200)
committerMarek Olšák <maraeo@gmail.com>
Mon, 29 Oct 2012 11:51:41 +0000 (12:51 +0100)
The 2x and 4x MSAA cases are completely broken. The lfdptr instruction returns
garbage there.

The 8x MSAA case is broken on Cayman, though at least the result looks somewhat
correct.

Only the 8x MSAA case works on Evergreen and is enabled.

13 files changed:
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_blitter.h
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_sq.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 4d6cdd7a244a557288c9967fd37a5dbaebacd986..f4ac4aa868551fa440c749afa9b6640e984f4a45 100644 (file)
@@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter)
    FREE(ctx);
 }
 
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+   ctx->has_texture_multisample = supported;
+}
+
 static void blitter_set_running_flag(struct blitter_context_priv *ctx)
 {
    if (ctx->base.running) {
index de063937793cf081a9543d73ae10c3563b6433b3..c49faaad717a9cd227d70949cf497d5fbaeb5c0d 100644 (file)
@@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
    return blitter->pipe;
 }
 
+/**
+ * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
+ */
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+                                          boolean supported);
+
 /* The default function to draw a rectangle. This can only be used
  * inside of the draw_rectangle callback if the driver overrides it. */
 void util_blitter_draw_rectangle(struct blitter_context *blitter,
index 96e246a6e68ad46d174408010b378f37e082bcf8..17b7e9d2c72fdcda2f88114b86afeb6a0b90fede 100644 (file)
@@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
                return FALSE;
 
        if (sample_count > 1) {
-               if (rscreen->info.drm_minor < 19)
+               if (!rscreen->has_msaa)
                        return FALSE;
 
                switch (sample_count) {
@@ -1074,11 +1074,24 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                       S_030004_TEX_DEPTH(depth - 1) |
                                       S_030004_ARRAY_MODE(array_mode));
        view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
-       if (state->u.tex.last_level && texture->nr_samples <= 1) {
+
+       /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
+       if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
+               /* XXX the 2x and 4x cases are broken. */
+               if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
+                       /* disable FMASK (0 = disabled) */
+                       view->tex_resource_words[3] = 0;
+                       view->skip_mip_address_reloc = true;
+               } else {
+                       /* FMASK should be in MIP_ADDRESS for multisample textures */
+                       view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
+               }
+       } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
                view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
        } else {
                view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
        }
+
        view->tex_resource_words[4] = (word4 |
                                       S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
                                       S_030010_ENDIAN_SWAP(endian));
@@ -1582,9 +1595,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                        rctx->framebuffer.export_16bpc = false;
                }
 
-               /* Cayman can fetch from a compressed MSAA colorbuffer,
-                * so it's pointless to track them. */
-               if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+               if (rtex->fmask_size && rtex->cmask_size) {
                        rctx->framebuffer.compressed_cb_mask |= 1 << i;
                }
        }
@@ -2258,13 +2269,15 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
                r600_write_value(cs, (resource_id_base + resource_index) * 8);
                r600_write_array(cs, 8, rview->tex_resource_words);
 
-               /* XXX The kernel needs two relocations. This is stupid. */
                reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
                                              RADEON_USAGE_READ);
                r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
                r600_write_value(cs, reloc);
-               r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
-               r600_write_value(cs, reloc);
+
+               if (!rview->skip_mip_address_reloc) {
+                       r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+                       r600_write_value(cs, reloc);
+               }
        }
        state->dirty_mask = 0;
 }
@@ -3345,6 +3358,16 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx)
        return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS);
 }
 
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
+{
+       struct pipe_blend_state blend;
+
+       memset(&blend, 0, sizeof(blend));
+       blend.independent_blend_enable = true;
+       blend.rt[0].colormask = 0xf;
+       return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS);
+}
+
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 {
        struct pipe_depth_stencil_alpha_state dsa = {{0}};
index 98df83de9182fea8ab4e7f368f4d182e7856b931..edb1a55dc8a346ee75ee8d0fbc22ccbb70bc48c3 100644 (file)
 #define      V_028808_CB_ELIMINATE_FAST_CLEAR          0x00000002
 #define      V_028808_CB_RESOLVE                       0x00000003
 #define      V_028808_CB_DECOMPRESS                    0x00000004
-#define      V_028808_CB_FASK_DECOMPRESS               0x00000005
+#define      V_028808_CB_FMASK_DECOMPRESS              0x00000005
 #define   S_028808_ROP3(x)                             (((x) & 0xFF) << 16)
 #define   G_028808_ROP3(x)                             (((x) >> 16) & 0xFF)
 #define   C_028808_ROP3                                0xFF00FFFF
index 51a2e4ee9e592f0ab9ba9ffb90d472daeacc97d7..f04a92062f62b1f8ffc9a556913330141cc38941 100644 (file)
@@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
        return tex;
 }
 
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family)
+void r600_bytecode_init(struct r600_bytecode *bc,
+                       enum chip_class chip_class,
+                       enum radeon_family family,
+                       enum r600_msaa_texture_mode msaa_texture_mode)
 {
        if ((chip_class == R600) &&
            (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
@@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, en
 
        LIST_INITHEAD(&bc->cf);
        bc->chip_class = chip_class;
+       bc->msaa_texture_mode = msaa_texture_mode;
 }
 
 static int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod
 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
 {
        bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+                            EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
                                S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
                                S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
                                S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
@@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
        assert(count < 32);
 
        memset(&bc, 0, sizeof(bc));
-       r600_bytecode_init(&bc, rctx->chip_class, rctx->family);
+       r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
+                          rctx->screen->msaa_texture_support);
 
        for (i = 0; i < count; i++) {
                if (elements[i].instance_divisor > 1) {
index 8a9f3189be060f3fcb9e64ef8c117599bbd6ae07..2c7db2cefd792cda604d3205ee3047147acc3591 100644 (file)
@@ -62,6 +62,7 @@ struct r600_bytecode_alu {
 struct r600_bytecode_tex {
        struct list_head                list;
        unsigned                        inst;
+       unsigned                        inst_mod;
        unsigned                        resource_id;
        unsigned                        src_gpr;
        unsigned                        src_rel;
@@ -195,6 +196,7 @@ struct r600_cf_callstack {
 
 struct r600_bytecode {
        enum chip_class                 chip_class;
+       enum r600_msaa_texture_mode     msaa_texture_mode;
        int                             type;
        struct list_head                cf;
        struct r600_bytecode_cf         *cf_last;
@@ -219,7 +221,10 @@ struct r600_bytecode {
 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
 
 /* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family);
+void r600_bytecode_init(struct r600_bytecode *bc,
+                       enum chip_class chip_class,
+                       enum radeon_family family,
+                       enum r600_msaa_texture_mode msaa_texture_mode);
 void r600_bytecode_clear(struct r600_bytecode *bc);
 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
index 8597b8dfcf72399b5a2ebe22e11360f03189398e..a19248da3a2c4955b8f22b4c4540559964f884c5 100644 (file)
@@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        unsigned layer, level, checked_last_layer, max_layer;
-
-       assert(rctx->chip_class != CAYMAN);
+       void *blend_decompress;
 
        if (!rtex->dirty_level_mask)
                return;
 
+       switch (rctx->screen->msaa_texture_support) {
+       case MSAA_TEXTURE_DECOMPRESSED:
+               blend_decompress = rctx->custom_blend_decompress;
+               break;
+       case MSAA_TEXTURE_COMPRESSED:
+               /* XXX the 2x and 4x cases are broken. */
+               if (rtex->resource.b.b.nr_samples == 8)
+                       blend_decompress = rctx->custom_blend_fmask_decompress;
+               else
+                       blend_decompress = rctx->custom_blend_decompress;
+               break;
+       case MSAA_TEXTURE_SAMPLE_ZERO:
+       default:
+               /* Nothing to do. */
+               rtex->dirty_level_mask = 0;
+               return;
+       }
+
        for (level = first_level; level <= last_level; level++) {
                if (!(rtex->dirty_level_mask & (1 << level)))
                        continue;
@@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
                        cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
 
                        r600_blitter_begin(ctx, R600_DECOMPRESS);
-                       util_blitter_custom_color(rctx->blitter, cbsurf,
-                                                 rctx->custom_blend_decompress);
+                       util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress);
                        r600_blitter_end(ctx);
 
                        pipe_surface_reference(&cbsurf, NULL);
@@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context *rctx,
        unsigned i;
        unsigned mask = textures->compressed_colortex_mask;
 
-       /* Cayman cannot decompress an MSAA colorbuffer,
-        * but it can read it compressed, so skip this. */
-       assert(rctx->chip_class != CAYMAN);
-       if (rctx->chip_class == CAYMAN) {
-               return;
-       }
-
        while (mask) {
                struct pipe_sampler_view *view;
                struct r600_texture *tex;
@@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
                                        unsigned level,
                                        unsigned first_layer, unsigned last_layer)
 {
-       struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_texture *rtex = (struct r600_texture*)tex;
 
        if (rtex->is_depth && !rtex->is_flushing_texture) {
@@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
                                           level, level,
                                           first_layer, last_layer,
                                           0, u_max_sample(tex));
-       } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+       } else if (rtex->fmask_size && rtex->cmask_size) {
                r600_blit_decompress_color(ctx, rtex, level, level,
                                           first_layer, last_layer);
        }
@@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
        struct pipe_sampler_view src_templ, *src_view;
        unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
        struct pipe_box sbox;
+       bool copy_all_samples;
 
        /* Handle buffers first. */
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
                                                           src_widthFL, src_heightFL);
        }
 
+       copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
        /* Copy. */
-       /* XXX Multisample texturing is unimplemented on Cayman. In the meantime,
-        * copy only the first sample (which is the only one that is uncompressed
-        * and therefore doesn't return garbage). */
        r600_blitter_begin(ctx, R600_COPY_TEXTURE);
        util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
                                  abs(src_box->width), abs(src_box->height),
                                  src_view, src_box, src_width0, src_height0,
                                  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
-                                 rctx->chip_class != CAYMAN);
+                                 copy_all_samples);
        r600_blitter_end(ctx);
 
        pipe_surface_reference(&dst_view, NULL);
index 916fa381a33162df24e5bde3fdbf7fa419811bc3..7a1e135355335f60249ad3148a4eaf0a1767f01a 100644 (file)
@@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context *context)
        if (rctx->custom_blend_decompress) {
                rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress);
        }
+       if (rctx->custom_blend_fmask_decompress) {
+               rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress);
+       }
        util_unreference_framebuffer_state(&rctx->framebuffer.state);
 
        r600_context_fini(rctx);
@@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
                rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
                rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx);
                rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx);
+               rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx);
                rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
                                           rctx->family == CHIP_PALM ||
                                           rctx->family == CHIP_SUMO ||
@@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
        rctx->blitter = util_blitter_create(&rctx->context);
        if (rctx->blitter == NULL)
                goto fail;
+       util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
        rctx->blitter->draw_rectangle = r600_draw_rectangle;
 
        r600_begin_new_cs(rctx);
@@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_COMPUTE:
        case PIPE_CAP_START_INSTANCE:
        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-        case PIPE_CAP_TEXTURE_MULTISAMPLE:
                return 1;
 
        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_GLSL_FEATURE_LEVEL:
                return 130;
 
+       case PIPE_CAP_TEXTURE_MULTISAMPLE:
+               return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
        /* Supported except the original R600. */
        case PIPE_CAP_INDEP_BLEND_ENABLE:
        case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -947,6 +954,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
                break;
        }
 
+       /* MSAA support. */
+       switch (rscreen->chip_class) {
+       case R600:
+       case R700:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 22;
+               rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
+               break;
+       case EVERGREEN:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+               rscreen->msaa_texture_support =
+                       rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED :
+                                                       MSAA_TEXTURE_DECOMPRESSED;
+               break;
+       case CAYMAN:
+               rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+               /* We should be able to read compressed MSAA textures, but it doesn't work. */
+               rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
+               break;
+       }
+
        if (r600_init_tiling(rscreen)) {
                FREE(rscreen);
                return NULL;
index 17dab7f23d520def256485eaed8f549b73f49a80..238ab1676f47fe413132ff2e9a4266f642910f56 100644 (file)
@@ -184,6 +184,22 @@ struct r600_pipe_fences {
        pipe_mutex                      mutex;
 };
 
+enum r600_msaa_texture_mode {
+       /* If the hw can fetch the first sample only (no decompression available).
+        * This means MSAA texturing is not fully implemented. */
+       MSAA_TEXTURE_SAMPLE_ZERO,
+
+       /* If the hw can fetch decompressed MSAA textures.
+        * Supported families: R600, R700, Evergreen.
+        * Cayman cannot use this, because it cannot do the decompression. */
+       MSAA_TEXTURE_DECOMPRESSED,
+
+       /* If the hw can fetch compressed MSAA textures, which means shaders can
+        * read resolved FMASK. This yields the best performance.
+        * Supported families: Evergreen, Cayman. */
+       MSAA_TEXTURE_COMPRESSED
+};
+
 struct r600_screen {
        struct pipe_screen              screen;
        struct radeon_winsys            *ws;
@@ -191,6 +207,8 @@ struct r600_screen {
        enum chip_class                 chip_class;
        struct radeon_info              info;
        bool                            has_streamout;
+       bool                            has_msaa;
+       enum r600_msaa_texture_mode     msaa_texture_support;
        struct r600_tiling_info         tiling_info;
        struct r600_pipe_fences         fences;
 
@@ -205,6 +223,7 @@ struct r600_pipe_sampler_view {
        struct pipe_sampler_view        base;
        struct r600_resource            *tex_resource;
        uint32_t                        tex_resource_words[8];
+       bool                            skip_mip_address_reloc;
 };
 
 struct r600_rasterizer_state {
@@ -372,6 +391,7 @@ struct r600_context {
        void                            *custom_dsa_flush;
        void                            *custom_blend_resolve;
        void                            *custom_blend_decompress;
+       void                            *custom_blend_fmask_decompress;
        /* With rasterizer discard, there doesn't have to be a pixel shader.
         * In that case, we bind this one: */
        void                            *dummy_pixel_shader;
@@ -525,6 +545,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
 void *evergreen_create_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
                                      enum pipe_format format,
                                      enum pipe_texture_target target,
index c56efda534747c9966aa759dd7116eb18866299b..0b586f3aedb59087003b2e527b60045b9e05aa92 100644 (file)
@@ -1180,7 +1180,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        ctx.shader = shader;
        ctx.native_integers = true;
 
-       r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
+       r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+                          rscreen->msaa_texture_support);
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
        tgsi_parse_init(&ctx.parse, tokens);
@@ -3796,10 +3797,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        unsigned src_gpr;
        int r, i, j;
        int opcode;
+       bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
+                                   inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+                                   (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+                                    inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
        /* Texture fetch instructions can only use gprs as source.
         * Also they cannot negate the source or take the absolute value */
-       const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
-                                             tgsi_tex_src_requires_loading(ctx, 0);
+       const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+                                              tgsi_tex_src_requires_loading(ctx, 0)) ||
+                                            read_compressed_msaa;
        boolean src_loaded = FALSE;
        unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
        uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
@@ -4070,6 +4076,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                src_gpr = ctx->temp_reg;
        }
 
+       /* Obtain the sample index for reading a compressed MSAA color texture.
+        * To read the FMASK, we use the ldfptr instruction, which tells us
+        * where the samples are stored.
+        * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
+        * which is the identity mapping. Each nibble says which physical sample
+        * should be fetched to get that sample.
+        *
+        * Assume src.z contains the sample index. It should be modified like this:
+        *   src.z = (ldfptr() >> (src.z * 4)) & 0xF;
+        * Then fetch the texel with src.
+        */
+       if (read_compressed_msaa) {
+               unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
+               unsigned temp = r600_get_temp(ctx);
+               assert(src_loaded);
+
+               /* temp.w = ldfptr() */
+               memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+               tex.inst = SQ_TEX_INST_LD;
+               tex.inst_mod = 1; /* to indicate this is ldfptr */
+               tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+               tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+               tex.src_gpr = src_gpr;
+               tex.dst_gpr = temp;
+               tex.dst_sel_x = 7; /* mask out these components */
+               tex.dst_sel_y = 7;
+               tex.dst_sel_z = 7;
+               tex.dst_sel_w = 0; /* store X */
+               tex.src_sel_x = 0;
+               tex.src_sel_y = 1;
+               tex.src_sel_z = 2;
+               tex.src_sel_w = 3;
+               tex.offset_x = offset_x;
+               tex.offset_y = offset_y;
+               tex.offset_z = offset_z;
+               r = r600_bytecode_add_tex(ctx->bc, &tex);
+               if (r)
+                       return r;
+
+               /* temp.x = sample_index*4 */
+               if (ctx->bc->chip_class == CAYMAN) {
+                       for (i = 0 ; i < 4; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                               alu.inst = ctx->inst_info->r600_opcode;
+                               alu.src[0].sel = src_gpr;
+                               alu.src[0].chan = sample_chan;
+                               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+                               alu.src[1].value = 4;
+                               alu.dst.sel = temp;
+                               alu.dst.chan = i;
+                               alu.dst.write = i == 0;
+                               if (i == 3)
+                                       alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
+                       alu.src[0].sel = src_gpr;
+                       alu.src[0].chan = sample_chan;
+                       alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+                       alu.src[1].value = 4;
+                       alu.dst.sel = temp;
+                       alu.dst.chan = 0;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+
+               /* sample_index = temp.w >> temp.x */
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
+               alu.src[0].sel = temp;
+               alu.src[0].chan = 3;
+               alu.src[1].sel = temp;
+               alu.src[1].chan = 0;
+               alu.dst.sel = src_gpr;
+               alu.dst.chan = sample_chan;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+
+               /* sample_index & 0xF */
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+               alu.src[0].sel = src_gpr;
+               alu.src[0].chan = sample_chan;
+               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[1].value = 0xF;
+               alu.dst.sel = src_gpr;
+               alu.dst.chan = sample_chan;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+#if 0
+               /* visualize the FMASK */
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+                       alu.src[0].sel = src_gpr;
+                       alu.src[0].chan = sample_chan;
+                       alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+                       alu.dst.chan = i;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+               return 0;
+#endif
+       }
+
        opcode = ctx->inst_info->r600_opcode;
        if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
            inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
index 4b2a19a07f75bc1310ca7a543db73728aa827509..587f88deb9ef78954435b50488aea81cc0ededab 100644 (file)
 #define   S_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) & 0x1) << 5)
 #define   G_SQ_TEX_WORD0_BC_FRAC_MODE(x)                             (((x) >> 5) & 0x1)
 #define   C_SQ_TEX_WORD0_BC_FRAC_MODE                                0xFFFFFFDF
+#define   EG_S_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) & 0x3) << 5)
+#define   EG_G_SQ_TEX_WORD0_INST_MOD(x)                                 (((x) >> 5) & 0x3)
+#define   EG_C_SQ_TEX_WORD0_INST_MOD                                    0xFFFFFF9F
 #define   S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) & 0x1) << 7)
 #define   G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x)                         (((x) >> 7) & 0x1)
 #define   C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD                            0xFFFFFF7F
index 7d07008f16d5af10e7662da146a769142a599666..1a8d55e8d3664ab488c82b1d007d072377d556e2 100644 (file)
@@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
                return FALSE;
 
        if (sample_count > 1) {
-               if (rscreen->info.drm_minor < 22)
+               if (!rscreen->has_msaa)
                        return FALSE;
 
                /* R11G11B10 is broken on R6xx. */
@@ -1988,7 +1988,6 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
                r600_write_value(cs, (resource_id_base + resource_index) * 7);
                r600_write_array(cs, 7, rview->tex_resource_words);
 
-               /* XXX The kernel needs two relocations. This is stupid. */
                reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
                                              RADEON_USAGE_READ);
                r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
index 65985c7653db77811227a1e9fa9ac7e2e6db2a06..a4d3e461ef15ee7b2a18e537f5d11913552994f5 100644 (file)
@@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
                                dst->views.compressed_depthtex_mask &= ~(1 << i);
                        }
 
-                       /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */
-                       if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) {
+                       /* Track compressed colorbuffers. */
+                       if (rtex->cmask_size && rtex->fmask_size) {
                                dst->views.compressed_colortex_mask |= 1 << i;
                        } else {
                                dst->views.compressed_colortex_mask &= ~(1 << i);