radeonsi: implement MSAA colorbuffer compression for rendering
authorMarek Olšák <marek.olsak@amd.com>
Thu, 1 Aug 2013 23:44:15 +0000 (01:44 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 16 Aug 2013 23:48:25 +0000 (01:48 +0200)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/r600_hw_context.c
src/gallium/drivers/radeonsi/r600_resource.h
src/gallium/drivers/radeonsi/r600_texture.c
src/gallium/drivers/radeonsi/radeonsi_pipe.h
src/gallium/drivers/radeonsi/si_commands.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/sid.h

index bc6ba0bd1f01b6056dfe4cff3f87d5a81fa1a718..55daa82994e881eea004ff8a4ef80dca2770b34c 100644 (file)
@@ -184,10 +184,13 @@ static void r600_flush_framebuffer(struct r600_context *ctx)
                                S_0085F0_CB7_DEST_BASE_ENA(1) |
                                S_0085F0_DB_ACTION_ENA(1) |
                                S_0085F0_DB_DEST_BASE_ENA(1));
+       si_cmd_flush_and_inv_cb_meta(pm4);
+
        si_pm4_emit(ctx, pm4);
        si_pm4_free_state(ctx, pm4, ~0);
 
        ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
+       ctx->flush_and_inv_cb_meta = false;
 }
 
 void si_context_flush(struct r600_context *ctx, unsigned flags)
index ca8121f32f9b4de3b062f19c6d4df7f5cd2f1734..e5dd36a1bab6499983577acd9cfc9ca020cda947 100644 (file)
@@ -40,6 +40,22 @@ struct r600_transfer {
        struct pipe_resource            *staging;
 };
 
+struct r600_fmask_info {
+       unsigned offset;
+       unsigned size;
+       unsigned alignment;
+       unsigned bank_height;
+       unsigned slice_tile_max;
+       unsigned tile_mode_index;
+};
+
+struct r600_cmask_info {
+       unsigned offset;
+       unsigned size;
+       unsigned alignment;
+       unsigned slice_tile_max;
+};
+
 struct r600_texture {
        struct si_resource              resource;
 
@@ -48,12 +64,17 @@ struct r600_texture {
         * for the stencil buffer below. */
        enum pipe_format                real_format;
 
+       unsigned                        size;
        unsigned                        pitch_override;
        unsigned                        is_depth;
        unsigned                        dirty_level_mask; /* each bit says if that miplevel is dirty */
        struct r600_texture             *flushed_depth_texture;
        boolean                         is_flushing_texture;
        struct radeon_surface           surface;
+
+       /* Colorbuffer compression and fast clear. */
+       struct r600_fmask_info          fmask;
+       struct r600_cmask_info          cmask;
 };
 
 struct r600_surface {
index 185d987587dfa131008873d4622cfcd592faefd1..59e36045a8a2b0c6c5a6496cc88d6c35a21be604 100644 (file)
@@ -173,6 +173,9 @@ static int r600_setup_surface(struct pipe_screen *screen,
        if (r) {
                return r;
        }
+
+       rtex->size = rtex->surface.bo_size;
+
        if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
                /* old ddx on evergreen over estimate alignment for 1d, only 1 level
                 * for those
@@ -419,6 +422,116 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 
 DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "RADEON_PRINT_TEXDEPTH", FALSE);
 
+/* The number of samples can be specified independently of the texture. */
+static void r600_texture_get_fmask_info(struct r600_screen *rscreen,
+                                       struct r600_texture *rtex,
+                                       unsigned nr_samples,
+                                       struct r600_fmask_info *out)
+{
+       /* FMASK is allocated like an ordinary texture. */
+       struct radeon_surface fmask = rtex->surface;
+
+       memset(out, 0, sizeof(*out));
+
+       fmask.bo_alignment = 0;
+       fmask.bo_size = 0;
+       fmask.nsamples = 1;
+       fmask.flags |= RADEON_SURF_FMASK | RADEON_SURF_HAS_TILE_MODE_INDEX;
+
+       switch (nr_samples) {
+       case 2:
+       case 4:
+               fmask.bpe = 1;
+               break;
+       case 8:
+               fmask.bpe = 4;
+               break;
+       default:
+               R600_ERR("Invalid sample count for FMASK allocation.\n");
+               return;
+       }
+
+       if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
+               R600_ERR("Got error in surface_init while allocating FMASK.\n");
+               return;
+       }
+
+       assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+
+       out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
+       if (out->slice_tile_max)
+               out->slice_tile_max -= 1;
+
+       out->tile_mode_index = fmask.tiling_index[0];
+       out->bank_height = fmask.bankh;
+       out->alignment = MAX2(256, fmask.bo_alignment);
+       out->size = fmask.bo_size;
+}
+
+static void r600_texture_allocate_fmask(struct r600_screen *rscreen,
+                                       struct r600_texture *rtex)
+{
+       r600_texture_get_fmask_info(rscreen, rtex,
+                                   rtex->resource.b.b.nr_samples, &rtex->fmask);
+
+       rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
+       rtex->size = rtex->fmask.offset + rtex->fmask.size;
+}
+
+static void si_texture_get_cmask_info(struct r600_screen *rscreen,
+                                     struct r600_texture *rtex,
+                                     struct r600_cmask_info *out)
+{
+       unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+       unsigned num_pipes = rscreen->tiling_info.num_channels;
+       unsigned cl_width, cl_height;
+
+       switch (num_pipes) {
+       case 2:
+               cl_width = 32;
+               cl_height = 16;
+               break;
+       case 4:
+               cl_width = 32;
+               cl_height = 32;
+               break;
+       case 8:
+               cl_width = 64;
+               cl_height = 32;
+               break;
+       default:
+               assert(0);
+               return;
+       }
+
+       unsigned base_align = num_pipes * pipe_interleave_bytes;
+
+       unsigned width = align(rtex->surface.npix_x, cl_width*8);
+       unsigned height = align(rtex->surface.npix_y, cl_height*8);
+       unsigned slice_elements = (width * height) / (8*8);
+
+       /* Each element of CMASK is a nibble. */
+       unsigned slice_bytes = slice_elements / 2;
+
+       out->slice_tile_max = (width * height) / (128*128);
+       if (out->slice_tile_max)
+               out->slice_tile_max -= 1;
+
+       out->alignment = MAX2(256, base_align);
+       out->size = rtex->surface.array_size * align(slice_bytes, base_align);
+}
+
+static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
+                                       struct r600_texture *rtex)
+{
+       si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+
+       if (rtex->cmask.size) {
+               rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
+               rtex->size = rtex->cmask.offset + rtex->cmask.size;
+       }
+}
+
 static struct r600_texture *
 r600_texture_create_object(struct pipe_screen *screen,
                           const struct pipe_resource *base,
@@ -456,13 +569,23 @@ r600_texture_create_object(struct pipe_screen *screen,
                return NULL;
        }
 
+       if (base->nr_samples > 1 && !rtex->is_depth && !buf) {
+               r600_texture_allocate_fmask(rscreen, rtex);
+               r600_texture_allocate_cmask(rscreen, rtex);
+       }
+
+       if (!rtex->is_depth && base->nr_samples > 1 &&
+           (!rtex->fmask.size || !rtex->cmask.size)) {
+               FREE(rtex);
+               return NULL;
+       }
+
        /* Now create the backing buffer. */
        if (!buf && alloc_bo) {
                unsigned base_align = rtex->surface.bo_alignment;
-               unsigned size = rtex->surface.bo_size;
 
                base_align = rtex->surface.bo_alignment;
-               if (!si_init_resource(rscreen, resource, size, base_align, FALSE, base->usage)) {
+               if (!si_init_resource(rscreen, resource, rtex->size, base_align, FALSE, base->usage)) {
                        FREE(rtex);
                        return NULL;
                }
@@ -472,6 +595,12 @@ r600_texture_create_object(struct pipe_screen *screen,
                resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
        }
 
+       if (rtex->cmask.size) {
+               /* Initialize the cmask to 0xCC (= compressed state). */
+               char *map = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+               memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size);
+       }
+
        if (debug_get_option_print_texdepth() && rtex->is_depth) {
                printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
                       "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
index b909323cdfdded89fd8f307d593d94dd8231dae7..d255837a9544d8f1819234c6f329ed1cd0ea74df 100644 (file)
@@ -217,6 +217,8 @@ struct r600_context {
        /* SI state handling */
        union si_state  queued;
        union si_state  emitted;
+
+       bool flush_and_inv_cb_meta;
 };
 
 /* r600_blit.c */
index bf9592493c3560f93b1017c4dc542a351a43c4aa..e498bd2d12ee7ddb58e534ec3ee9ef0792fe4b4d 100644 (file)
@@ -78,3 +78,12 @@ void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl)
                si_pm4_cmd_end(pm4, false);
        }
 }
+
+void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4)
+{
+       si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
+       si_pm4_cmd_add(pm4,
+                      EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) |
+                      EVENT_INDEX(0));
+       si_pm4_cmd_end(pm4, false);
+}
index ff4a4b1dba4219771d56c93b7fc0ce81b8d1f510..496d3a4941db04c58553549b361c6a6ea633b996 100644 (file)
@@ -1852,8 +1852,22 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 
        if (rtex->resource.b.b.nr_samples > 1) {
                unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
+
                color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
                                S_028C74_NUM_FRAGMENTS(log_samples);
+
+               if (rtex->fmask.size) {
+                       color_info |= S_028C70_COMPRESSION(1);
+                       unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
+
+                       /* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */
+                       color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) |
+                                       S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+               }
+       }
+
+       if (rtex->cmask.size) {
+               color_info |= S_028C70_FAST_CLEAR(1);
        }
 
        offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
@@ -1875,6 +1889,19 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
        si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
        si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
 
+       if (rtex->cmask.size) {
+               si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C,
+                              offset + (rtex->cmask.offset >> 8));
+               si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C,
+                              S_028C80_TILE_MAX(rtex->cmask.slice_tile_max));
+       }
+       if (rtex->fmask.size) {
+               si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C,
+                              offset + (rtex->fmask.offset >> 8));
+               si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C,
+                              S_028C88_TILE_MAX(rtex->fmask.slice_tile_max));
+       }
+
        /* set CB_COLOR1_INFO for possible dual-src blending */
        if (state->nr_cbufs == 1) {
                assert(cb == 0);
@@ -2210,6 +2237,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                return;
 
        si_pm4_inval_fb_cache(pm4, state->nr_cbufs);
+       rctx->flush_and_inv_cb_meta = true;
 
        if (state->zsbuf)
                si_pm4_inval_zsbuf_cache(pm4);
index fc9aa221959ec6a2ad43827149c13a9b808226a3..30043a7877788be5a6c5f7900b14977a4ede5879 100644 (file)
@@ -83,6 +83,7 @@ struct si_vertex_element
 union si_state {
        struct {
                struct si_pm4_state             *sync;
+               struct si_pm4_state             *flush_and_inv_cb_meta;
                struct si_pm4_state             *init;
                struct si_state_blend           *blend;
                struct si_pm4_state             *blend_color;
@@ -229,5 +230,6 @@ void si_cmd_draw_index_2(struct si_pm4_state *pm4, uint32_t max_size,
 void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
                            uint32_t initiator, bool predicate);
 void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl);
+void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4);
 
 #endif
index 6be10c84cc901abefc9f292e852438f9b717ada3..c1b1bae5f98649a8d23d08a2a54957de1c1df4a6 100644 (file)
@@ -706,6 +706,17 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                si_pm4_set_state(rctx, sync, pm4);
        }
 
+       if (rctx->flush_and_inv_cb_meta) {
+               struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+
+               if (pm4 == NULL)
+                       return;
+
+               si_cmd_flush_and_inv_cb_meta(pm4);
+               si_pm4_set_state(rctx, flush_and_inv_cb_meta, pm4);
+               rctx->flush_and_inv_cb_meta = false;
+       }
+
        /* Emit states. */
        rctx->pm4_dirty_cdwords += si_pm4_dirty_dw(rctx);
 
index 86394075cf0af031fc61fea261e7e337aa83ef1d..f20d0513a3829ef8570225d67a59aa5ec6c79f10 100644 (file)
 #define   S_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) & 0x1F) << 5)
 #define   G_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) >> 5) & 0x1F)
 #define   C_028C74_FMASK_TILE_MODE_INDEX                              0xFFFFFC1F
+#define   S_028C74_FMASK_BANK_HEIGHT(x)                                      (((x) & 0x3) << 10) /* SI errata */
 #define   S_028C74_NUM_SAMPLES(x)                                     (((x) & 0x07) << 12)
 #define   G_028C74_NUM_SAMPLES(x)                                     (((x) >> 12) & 0x07)
 #define   C_028C74_NUM_SAMPLES                                        0xFFFF8FFF