From 96ed6c90eff58ce030c39c2b4db6daf512586b34 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Oct 2012 18:46:32 +0200 Subject: [PATCH] r600g: implement texturing with 8x MSAA compressed surfaces for Evergreen The 2x and 4x MSAA cases are completely broken. The lfdptr instruction returns garbage there. The 8x MSAA case is broken on Cayman, though at least the result looks somewhat correct. Only the 8x MSAA case works on Evergreen and is enabled. --- src/gallium/auxiliary/util/u_blitter.c | 8 ++ src/gallium/auxiliary/util/u_blitter.h | 6 + src/gallium/drivers/r600/evergreen_state.c | 39 ++++-- src/gallium/drivers/r600/evergreend.h | 2 +- src/gallium/drivers/r600/r600_asm.c | 10 +- src/gallium/drivers/r600/r600_asm.h | 7 +- src/gallium/drivers/r600/r600_blit.c | 42 +++--- src/gallium/drivers/r600/r600_pipe.c | 29 +++- src/gallium/drivers/r600/r600_pipe.h | 21 +++ src/gallium/drivers/r600/r600_shader.c | 133 ++++++++++++++++++- src/gallium/drivers/r600/r600_sq.h | 3 + src/gallium/drivers/r600/r600_state.c | 3 +- src/gallium/drivers/r600/r600_state_common.c | 4 +- 13 files changed, 270 insertions(+), 37 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 4d6cdd7a244..f4ac4aa8685 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter) FREE(ctx); } +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + ctx->has_texture_multisample = supported; +} + static void blitter_set_running_flag(struct blitter_context_priv *ctx) { if (ctx->base.running) { diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index de063937793..c49faaad717 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) return blitter->pipe; } +/** + * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver. + */ +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported); + /* The default function to draw a rectangle. This can only be used * inside of the draw_rectangle callback if the driver overrides it. */ void util_blitter_draw_rectangle(struct blitter_context *blitter, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 96e246a6e68..17b7e9d2c72 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen, return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 19) + if (!rscreen->has_msaa) return FALSE; switch (sample_count) { @@ -1074,11 +1074,24 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; - if (state->u.tex.last_level && texture->nr_samples <= 1) { + + /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ + if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) { + /* XXX the 2x and 4x cases are broken. */ + if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) { + /* disable FMASK (0 = disabled) */ + view->tex_resource_words[3] = 0; + view->skip_mip_address_reloc = true; + } else { + /* FMASK should be in MIP_ADDRESS for multisample textures */ + view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8; + } + } else if (state->u.tex.last_level && texture->nr_samples <= 1) { view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; } else { view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; } + view->tex_resource_words[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian)); @@ -1582,9 +1595,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, rctx->framebuffer.export_16bpc = false; } - /* Cayman can fetch from a compressed MSAA colorbuffer, - * so it's pointless to track them. */ - if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + if (rtex->fmask_size && rtex->cmask_size) { rctx->framebuffer.compressed_cb_mask |= 1 << i; } } @@ -2258,13 +2269,15 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 8); r600_write_array(cs, 8, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); - r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, reloc); + + if (!rview->skip_mip_address_reloc) { + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, reloc); + } } state->dirty_mask = 0; } @@ -3345,6 +3358,16 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx) return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS); } +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + blend.independent_blend_enable = true; + blend.rt[0].colormask = 0xf; + return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS); +} + void *evergreen_create_db_flush_dsa(struct r600_context *rctx) { struct pipe_depth_stencil_alpha_state dsa = {{0}}; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 98df83de918..edb1a55dc8a 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -486,7 +486,7 @@ #define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002 #define V_028808_CB_RESOLVE 0x00000003 #define V_028808_CB_DECOMPRESS 0x00000004 -#define V_028808_CB_FASK_DECOMPRESS 0x00000005 +#define V_028808_CB_FMASK_DECOMPRESS 0x00000005 #define S_028808_ROP3(x) (((x) & 0xFF) << 16) #define G_028808_ROP3(x) (((x) >> 16) & 0xFF) #define C_028808_ROP3 0xFF00FFFF diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 51a2e4ee9e5..f04a92062f6 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void) return tex; } -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode) { if ((chip_class == R600) && (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { @@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, en LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; + bc->msaa_texture_mode = msaa_texture_mode; } static int r600_bytecode_add_cf(struct r600_bytecode *bc) @@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); @@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, assert(count < 32); memset(&bc, 0, sizeof(bc)); - r600_bytecode_init(&bc, rctx->chip_class, rctx->family); + r600_bytecode_init(&bc, rctx->chip_class, rctx->family, + rctx->screen->msaa_texture_support); for (i = 0; i < count; i++) { if (elements[i].instance_divisor > 1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 8a9f3189be0..2c7db2cefd7 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -62,6 +62,7 @@ struct r600_bytecode_alu { struct r600_bytecode_tex { struct list_head list; unsigned inst; + unsigned inst_mod; unsigned resource_id; unsigned src_gpr; unsigned src_rel; @@ -195,6 +196,7 @@ struct r600_cf_callstack { struct r600_bytecode { enum chip_class chip_class; + enum r600_msaa_texture_mode msaa_texture_mode; int type; struct list_head cf; struct r600_bytecode_cf *cf_last; @@ -219,7 +221,10 @@ struct r600_bytecode { int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family); +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode); void r600_bytecode_clear(struct r600_bytecode *bc); int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 8597b8dfcf7..a19248da3a2 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, { struct r600_context *rctx = (struct r600_context *)ctx; unsigned layer, level, checked_last_layer, max_layer; - - assert(rctx->chip_class != CAYMAN); + void *blend_decompress; if (!rtex->dirty_level_mask) return; + switch (rctx->screen->msaa_texture_support) { + case MSAA_TEXTURE_DECOMPRESSED: + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_COMPRESSED: + /* XXX the 2x and 4x cases are broken. */ + if (rtex->resource.b.b.nr_samples == 8) + blend_decompress = rctx->custom_blend_fmask_decompress; + else + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_SAMPLE_ZERO: + default: + /* Nothing to do. */ + rtex->dirty_level_mask = 0; + return; + } + for (level = first_level; level <= last_level; level++) { if (!(rtex->dirty_level_mask & (1 << level))) continue; @@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); r600_blitter_begin(ctx, R600_DECOMPRESS); - util_blitter_custom_color(rctx->blitter, cbsurf, - rctx->custom_blend_decompress); + util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress); r600_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); @@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context *rctx, unsigned i; unsigned mask = textures->compressed_colortex_mask; - /* Cayman cannot decompress an MSAA colorbuffer, - * but it can read it compressed, so skip this. */ - assert(rctx->chip_class != CAYMAN); - if (rctx->chip_class == CAYMAN) { - return; - } - while (mask) { struct pipe_sampler_view *view; struct r600_texture *tex; @@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, unsigned level, unsigned first_layer, unsigned last_layer) { - struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rtex = (struct r600_texture*)tex; if (rtex->is_depth && !rtex->is_flushing_texture) { @@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, level, level, first_layer, last_layer, 0, u_max_sample(tex)); - } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + } else if (rtex->fmask_size && rtex->cmask_size) { r600_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } @@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_sampler_view src_templ, *src_view; unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL; struct pipe_box sbox; + bool copy_all_samples; /* Handle buffers first. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { @@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src_widthFL, src_heightFL); } + copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Copy. */ - /* XXX Multisample texturing is unimplemented on Cayman. In the meantime, - * copy only the first sample (which is the only one that is uncompressed - * and therefore doesn't return garbage). */ r600_blitter_begin(ctx, R600_COPY_TEXTURE); util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty, abs(src_box->width), abs(src_box->height), src_view, src_box, src_width0, src_height0, PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, - rctx->chip_class != CAYMAN); + copy_all_samples); r600_blitter_end(ctx); pipe_surface_reference(&dst_view, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 916fa381a33..7a1e1353553 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context *context) if (rctx->custom_blend_decompress) { rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress); } + if (rctx->custom_blend_fmask_decompress) { + rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress); + } util_unreference_framebuffer_state(&rctx->framebuffer.state); r600_context_fini(rctx); @@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); + rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR || rctx->family == CHIP_PALM || rctx->family == CHIP_SUMO || @@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) goto fail; + util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); @@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_TEXTURE_MULTISAMPLE: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: return 130; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Supported except the original R600. */ case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: @@ -947,6 +954,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) break; } + /* MSAA support. */ + switch (rscreen->chip_class) { + case R600: + case R700: + rscreen->has_msaa = rscreen->info.drm_minor >= 22; + rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED; + break; + case EVERGREEN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + rscreen->msaa_texture_support = + rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED : + MSAA_TEXTURE_DECOMPRESSED; + break; + case CAYMAN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + /* We should be able to read compressed MSAA textures, but it doesn't work. */ + rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO; + break; + } + if (r600_init_tiling(rscreen)) { FREE(rscreen); return NULL; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 17dab7f23d5..238ab1676f4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -184,6 +184,22 @@ struct r600_pipe_fences { pipe_mutex mutex; }; +enum r600_msaa_texture_mode { + /* If the hw can fetch the first sample only (no decompression available). + * This means MSAA texturing is not fully implemented. */ + MSAA_TEXTURE_SAMPLE_ZERO, + + /* If the hw can fetch decompressed MSAA textures. + * Supported families: R600, R700, Evergreen. + * Cayman cannot use this, because it cannot do the decompression. */ + MSAA_TEXTURE_DECOMPRESSED, + + /* If the hw can fetch compressed MSAA textures, which means shaders can + * read resolved FMASK. This yields the best performance. + * Supported families: Evergreen, Cayman. */ + MSAA_TEXTURE_COMPRESSED +}; + struct r600_screen { struct pipe_screen screen; struct radeon_winsys *ws; @@ -191,6 +207,8 @@ struct r600_screen { enum chip_class chip_class; struct radeon_info info; bool has_streamout; + bool has_msaa; + enum r600_msaa_texture_mode msaa_texture_support; struct r600_tiling_info tiling_info; struct r600_pipe_fences fences; @@ -205,6 +223,7 @@ struct r600_pipe_sampler_view { struct pipe_sampler_view base; struct r600_resource *tex_resource; uint32_t tex_resource_words[8]; + bool skip_mip_address_reloc; }; struct r600_rasterizer_state { @@ -372,6 +391,7 @@ struct r600_context { void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_decompress; + void *custom_blend_fmask_decompress; /* With rasterizer discard, there doesn't have to be a pixel shader. * In that case, we bind this one: */ void *dummy_pixel_shader; @@ -525,6 +545,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader void *evergreen_create_db_flush_dsa(struct r600_context *rctx); void *evergreen_create_resolve_blend(struct r600_context *rctx); void *evergreen_create_decompress_blend(struct r600_context *rctx); +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c56efda5347..0b586f3aedb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1180,7 +1180,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, ctx.shader = shader; ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family); + r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family, + rscreen->msaa_texture_support); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -3796,10 +3797,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i, j; int opcode; + bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED && + inst->Instruction.Opcode == TGSI_OPCODE_TXF && + (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && - tgsi_tex_src_requires_loading(ctx, 0); + const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && + tgsi_tex_src_requires_loading(ctx, 0)) || + read_compressed_msaa; boolean src_loaded = FALSE; unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; uint8_t offset_x = 0, offset_y = 0, offset_z = 0; @@ -4070,6 +4076,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->temp_reg; } + /* Obtain the sample index for reading a compressed MSAA color texture. + * To read the FMASK, we use the ldfptr instruction, which tells us + * where the samples are stored. + * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * Assume src.z contains the sample index. It should be modified like this: + * src.z = (ldfptr() >> (src.z * 4)) & 0xF; + * Then fetch the texel with src. + */ + if (read_compressed_msaa) { + unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4; + unsigned temp = r600_get_temp(ctx); + assert(src_loaded); + + /* temp.w = ldfptr() */ + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.inst = SQ_TEX_INST_LD; + tex.inst_mod = 1; /* to indicate this is ldfptr */ + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + tex.src_gpr = src_gpr; + tex.dst_gpr = temp; + tex.dst_sel_x = 7; /* mask out these components */ + tex.dst_sel_y = 7; + tex.dst_sel_z = 7; + tex.dst_sel_w = 0; /* store X */ + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + tex.offset_x = offset_x; + tex.offset_y = offset_y; + tex.offset_z = offset_z; + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + + /* temp.x = sample_index*4 */ + if (ctx->bc->chip_class == CAYMAN) { + for (i = 0 ; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = i; + alu.dst.write = i == 0; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* sample_index = temp.w >> temp.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT); + alu.src[0].sel = temp; + alu.src[0].chan = 3; + alu.src[1].sel = temp; + alu.src[1].chan = 0; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* sample_index & 0xF */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0xF; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; +#if 0 + /* visualize the FMASK */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + alu.dst.chan = i; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +#endif + } + opcode = ctx->inst_info->r600_opcode; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 4b2a19a07f7..587f88deb9e 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -375,6 +375,9 @@ #define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) #define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) #define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF +#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5) +#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3) +#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F #define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) #define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) #define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d07008f16d..1a8d55e8d36 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 22) + if (!rscreen->has_msaa) return FALSE; /* R11G11B10 is broken on R6xx. */ @@ -1988,7 +1988,6 @@ static void r600_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 7); r600_write_array(cs, 7, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 65985c7653d..a4d3e461ef1 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_depthtex_mask &= ~(1 << i); } - /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */ - if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) { + /* Track compressed colorbuffers. */ + if (rtex->cmask_size && rtex->fmask_size) { dst->views.compressed_colortex_mask |= 1 << i; } else { dst->views.compressed_colortex_mask &= ~(1 << i); -- 2.30.2