From 2a4b2e23053db846903199ed1a892fe72da70750 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 2 Aug 2013 01:44:15 +0200 Subject: [PATCH] radeonsi: implement MSAA colorbuffer compression for rendering MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- .../drivers/radeonsi/r600_hw_context.c | 3 + src/gallium/drivers/radeonsi/r600_resource.h | 21 +++ src/gallium/drivers/radeonsi/r600_texture.c | 133 +++++++++++++++++- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 2 + src/gallium/drivers/radeonsi/si_commands.c | 9 ++ src/gallium/drivers/radeonsi/si_state.c | 28 ++++ src/gallium/drivers/radeonsi/si_state.h | 2 + src/gallium/drivers/radeonsi/si_state_draw.c | 11 ++ src/gallium/drivers/radeonsi/sid.h | 1 + 9 files changed, 208 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c index bc6ba0bd1f0..55daa82994e 100644 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c @@ -184,10 +184,13 @@ static void r600_flush_framebuffer(struct r600_context *ctx) S_0085F0_CB7_DEST_BASE_ENA(1) | S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1)); + si_cmd_flush_and_inv_cb_meta(pm4); + si_pm4_emit(ctx, pm4); si_pm4_free_state(ctx, pm4, ~0); ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; + ctx->flush_and_inv_cb_meta = false; } void si_context_flush(struct r600_context *ctx, unsigned flags) diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h index ca8121f32f9..e5dd36a1bab 100644 --- a/src/gallium/drivers/radeonsi/r600_resource.h +++ b/src/gallium/drivers/radeonsi/r600_resource.h @@ -40,6 +40,22 @@ struct r600_transfer { struct pipe_resource *staging; }; +struct r600_fmask_info { + unsigned offset; + unsigned size; + unsigned alignment; + unsigned bank_height; + unsigned slice_tile_max; + unsigned tile_mode_index; +}; + +struct r600_cmask_info { + unsigned offset; + unsigned size; + unsigned alignment; + unsigned slice_tile_max; +}; + struct r600_texture { struct si_resource resource; @@ -48,12 +64,17 @@ struct r600_texture { * for the stencil buffer below. */ enum pipe_format real_format; + unsigned size; unsigned pitch_override; unsigned is_depth; unsigned dirty_level_mask; /* each bit says if that miplevel is dirty */ struct r600_texture *flushed_depth_texture; boolean is_flushing_texture; struct radeon_surface surface; + + /* Colorbuffer compression and fast clear. */ + struct r600_fmask_info fmask; + struct r600_cmask_info cmask; }; struct r600_surface { diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c index 185d987587d..59e36045a8a 100644 --- a/src/gallium/drivers/radeonsi/r600_texture.c +++ b/src/gallium/drivers/radeonsi/r600_texture.c @@ -173,6 +173,9 @@ static int r600_setup_surface(struct pipe_screen *screen, if (r) { return r; } + + rtex->size = rtex->surface.bo_size; + if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) { /* old ddx on evergreen over estimate alignment for 1d, only 1 level * for those @@ -419,6 +422,116 @@ static const struct u_resource_vtbl r600_texture_vtbl = DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "RADEON_PRINT_TEXDEPTH", FALSE); +/* The number of samples can be specified independently of the texture. */ +static void r600_texture_get_fmask_info(struct r600_screen *rscreen, + struct r600_texture *rtex, + unsigned nr_samples, + struct r600_fmask_info *out) +{ + /* FMASK is allocated like an ordinary texture. */ + struct radeon_surface fmask = rtex->surface; + + memset(out, 0, sizeof(*out)); + + fmask.bo_alignment = 0; + fmask.bo_size = 0; + fmask.nsamples = 1; + fmask.flags |= RADEON_SURF_FMASK | RADEON_SURF_HAS_TILE_MODE_INDEX; + + switch (nr_samples) { + case 2: + case 4: + fmask.bpe = 1; + break; + case 8: + fmask.bpe = 4; + break; + default: + R600_ERR("Invalid sample count for FMASK allocation.\n"); + return; + } + + if (rscreen->ws->surface_init(rscreen->ws, &fmask)) { + R600_ERR("Got error in surface_init while allocating FMASK.\n"); + return; + } + + assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); + + out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; + if (out->slice_tile_max) + out->slice_tile_max -= 1; + + out->tile_mode_index = fmask.tiling_index[0]; + out->bank_height = fmask.bankh; + out->alignment = MAX2(256, fmask.bo_alignment); + out->size = fmask.bo_size; +} + +static void r600_texture_allocate_fmask(struct r600_screen *rscreen, + struct r600_texture *rtex) +{ + r600_texture_get_fmask_info(rscreen, rtex, + rtex->resource.b.b.nr_samples, &rtex->fmask); + + rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment); + rtex->size = rtex->fmask.offset + rtex->fmask.size; +} + +static void si_texture_get_cmask_info(struct r600_screen *rscreen, + struct r600_texture *rtex, + struct r600_cmask_info *out) +{ + unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; + unsigned num_pipes = rscreen->tiling_info.num_channels; + unsigned cl_width, cl_height; + + switch (num_pipes) { + case 2: + cl_width = 32; + cl_height = 16; + break; + case 4: + cl_width = 32; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 32; + break; + default: + assert(0); + return; + } + + unsigned base_align = num_pipes * pipe_interleave_bytes; + + unsigned width = align(rtex->surface.npix_x, cl_width*8); + unsigned height = align(rtex->surface.npix_y, cl_height*8); + unsigned slice_elements = (width * height) / (8*8); + + /* Each element of CMASK is a nibble. */ + unsigned slice_bytes = slice_elements / 2; + + out->slice_tile_max = (width * height) / (128*128); + if (out->slice_tile_max) + out->slice_tile_max -= 1; + + out->alignment = MAX2(256, base_align); + out->size = rtex->surface.array_size * align(slice_bytes, base_align); +} + +static void r600_texture_allocate_cmask(struct r600_screen *rscreen, + struct r600_texture *rtex) +{ + si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); + + if (rtex->cmask.size) { + rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment); + rtex->size = rtex->cmask.offset + rtex->cmask.size; + } +} + static struct r600_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -456,13 +569,23 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + if (base->nr_samples > 1 && !rtex->is_depth && !buf) { + r600_texture_allocate_fmask(rscreen, rtex); + r600_texture_allocate_cmask(rscreen, rtex); + } + + if (!rtex->is_depth && base->nr_samples > 1 && + (!rtex->fmask.size || !rtex->cmask.size)) { + FREE(rtex); + return NULL; + } + /* Now create the backing buffer. */ if (!buf && alloc_bo) { unsigned base_align = rtex->surface.bo_alignment; - unsigned size = rtex->surface.bo_size; base_align = rtex->surface.bo_alignment; - if (!si_init_resource(rscreen, resource, size, base_align, FALSE, base->usage)) { + if (!si_init_resource(rscreen, resource, rtex->size, base_align, FALSE, base->usage)) { FREE(rtex); return NULL; } @@ -472,6 +595,12 @@ r600_texture_create_object(struct pipe_screen *screen, resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; } + if (rtex->cmask.size) { + /* Initialize the cmask to 0xCC (= compressed state). */ + char *map = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE); + memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size); + } + if (debug_get_option_print_texdepth() && rtex->is_depth) { printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, " diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index b909323cdfd..d255837a954 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -217,6 +217,8 @@ struct r600_context { /* SI state handling */ union si_state queued; union si_state emitted; + + bool flush_and_inv_cb_meta; }; /* r600_blit.c */ diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c index bf9592493c3..e498bd2d12e 100644 --- a/src/gallium/drivers/radeonsi/si_commands.c +++ b/src/gallium/drivers/radeonsi/si_commands.c @@ -78,3 +78,12 @@ void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl) si_pm4_cmd_end(pm4, false); } } + +void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4) +{ + si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); + si_pm4_cmd_add(pm4, + EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | + EVENT_INDEX(0)); + si_pm4_cmd_end(pm4, false); +} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ff4a4b1dba4..496d3a4941d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1852,8 +1852,22 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, if (rtex->resource.b.b.nr_samples > 1) { unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); + color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples); + + if (rtex->fmask.size) { + color_info |= S_028C70_COMPRESSION(1); + unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); + + /* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */ + color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) | + S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); + } + } + + if (rtex->cmask.size) { + color_info |= S_028C70_FAST_CLEAR(1); } offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture); @@ -1875,6 +1889,19 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info); si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib); + if (rtex->cmask.size) { + si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C, + offset + (rtex->cmask.offset >> 8)); + si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C, + S_028C80_TILE_MAX(rtex->cmask.slice_tile_max)); + } + if (rtex->fmask.size) { + si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C, + offset + (rtex->fmask.offset >> 8)); + si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C, + S_028C88_TILE_MAX(rtex->fmask.slice_tile_max)); + } + /* set CB_COLOR1_INFO for possible dual-src blending */ if (state->nr_cbufs == 1) { assert(cb == 0); @@ -2210,6 +2237,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, return; si_pm4_inval_fb_cache(pm4, state->nr_cbufs); + rctx->flush_and_inv_cb_meta = true; if (state->zsbuf) si_pm4_inval_zsbuf_cache(pm4); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fc9aa221959..30043a78777 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -83,6 +83,7 @@ struct si_vertex_element union si_state { struct { struct si_pm4_state *sync; + struct si_pm4_state *flush_and_inv_cb_meta; struct si_pm4_state *init; struct si_state_blend *blend; struct si_pm4_state *blend_color; @@ -229,5 +230,6 @@ void si_cmd_draw_index_2(struct si_pm4_state *pm4, uint32_t max_size, void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count, uint32_t initiator, bool predicate); void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl); +void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4); #endif diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 6be10c84cc9..c1b1bae5f98 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -706,6 +706,17 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_pm4_set_state(rctx, sync, pm4); } + if (rctx->flush_and_inv_cb_meta) { + struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); + + if (pm4 == NULL) + return; + + si_cmd_flush_and_inv_cb_meta(pm4); + si_pm4_set_state(rctx, flush_and_inv_cb_meta, pm4); + rctx->flush_and_inv_cb_meta = false; + } + /* Emit states. */ rctx->pm4_dirty_cdwords += si_pm4_dirty_dw(rctx); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 86394075cf0..f20d0513a38 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -8521,6 +8521,7 @@ #define S_028C74_FMASK_TILE_MODE_INDEX(x) (((x) & 0x1F) << 5) #define G_028C74_FMASK_TILE_MODE_INDEX(x) (((x) >> 5) & 0x1F) #define C_028C74_FMASK_TILE_MODE_INDEX 0xFFFFFC1F +#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 10) /* SI errata */ #define S_028C74_NUM_SAMPLES(x) (((x) & 0x07) << 12) #define G_028C74_NUM_SAMPLES(x) (((x) >> 12) & 0x07) #define C_028C74_NUM_SAMPLES 0xFFFF8FFF -- 2.30.2