From 5a609fbcb5459fc5cac2e0361a405ea4b884325f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 7 Dec 2013 17:32:22 +0100 Subject: [PATCH] gallium/u_blitter: implement shader-based MSAA resolve with bilinear filtering For scaled resolve. The filter is only good for magnification. If somebody has an idea how to implement a good filter for minification, I'm all ears. I'd have to use derivatives probably. Reviewed-by: Brian Paul --- src/gallium/auxiliary/util/u_blitter.c | 79 +++++++++------ src/gallium/auxiliary/util/u_simple_shaders.c | 95 +++++++++++++++++++ src/gallium/auxiliary/util/u_simple_shaders.h | 6 ++ 3 files changed, 149 insertions(+), 31 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index e3faf6c0287..9246bd722ca 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -95,9 +95,9 @@ struct blitter_context_priv void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES]; /* FS which outputs an average of all samples. */ - void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS]; - void *fs_resolve_sint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS]; - void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS]; + void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; + void *fs_resolve_sint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; + void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; /* Blend state. */ void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */ @@ -342,7 +342,7 @@ void util_blitter_destroy(struct blitter_context *blitter) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = blitter->pipe; - int i, j; + int i, j, f; for (i = 0; i <= PIPE_MASK_RGBA; i++) { pipe->delete_blend_state(pipe, ctx->blend[i]); @@ -382,16 +382,19 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil[i]); for (j = 0; j< Elements(ctx->fs_resolve[i]); j++) - if (ctx->fs_resolve[i][j]) - ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j]); + for (f = 0; f < 2; f++) + if (ctx->fs_resolve[i][j][f]) + ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]); for (j = 0; j< Elements(ctx->fs_resolve_sint[i]); j++) - if (ctx->fs_resolve_sint[i][j]) - ctx->delete_fs_state(pipe, ctx->fs_resolve_sint[i][j]); + for (f = 0; f < 2; f++) + if (ctx->fs_resolve_sint[i][j][f]) + ctx->delete_fs_state(pipe, ctx->fs_resolve_sint[i][j][f]); for (j = 0; j< Elements(ctx->fs_resolve_uint[i]); j++) - if (ctx->fs_resolve_uint[i][j]) - ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j]); + for (f = 0; f < 2; f++) + if (ctx->fs_resolve_uint[i][j][f]) + ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]); } ctx->delete_fs_state(pipe, ctx->fs_empty); @@ -750,7 +753,8 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, enum pipe_format format, enum pipe_texture_target target, unsigned src_nr_samples, - unsigned dst_nr_samples) + unsigned dst_nr_samples, + unsigned filter) { struct pipe_context *pipe = ctx->base.pipe; unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples); @@ -768,17 +772,26 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, is_uint = util_format_is_pure_uint(format); is_sint = util_format_is_pure_sint(format); + assert(filter < 2); + if (is_uint) - shader = &ctx->fs_resolve_uint[target][index]; + shader = &ctx->fs_resolve_uint[target][index][filter]; else if (is_sint) - shader = &ctx->fs_resolve_sint[target][index]; + shader = &ctx->fs_resolve_sint[target][index][filter]; else - shader = &ctx->fs_resolve[target][index]; + shader = &ctx->fs_resolve[target][index][filter]; if (!*shader) { - *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex, - src_nr_samples, - is_uint, is_sint); + if (filter == PIPE_TEX_FILTER_LINEAR) { + *shader = util_make_fs_msaa_resolve_bilinear(pipe, tgsi_tex, + src_nr_samples, + is_uint, is_sint); + } + else { + *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex, + src_nr_samples, + is_uint, is_sint); + } } } else { @@ -925,7 +938,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_screen *screen = blitter->pipe->screen; - unsigned samples, j, target, max_samples; + unsigned samples, j, f, target, max_samples; boolean has_arraytex, has_cubearraytex; max_samples = ctx->has_texture_multisample ? 2 : 1; @@ -955,7 +968,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) * they read one sample. */ blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target, - samples, samples); + samples, samples, 0); blitter_get_fs_texfetch_depth(ctx, target, samples); if (ctx->has_stencil_export) { blitter_get_fs_texfetch_depthstencil(ctx, target, samples); @@ -973,12 +986,14 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) continue; } - blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target, - j, 1); - blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target, - j, 1); - blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target, - j, 1); + for (f = 0; f < 2; f++) { + blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target, + j, 1, f); + blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target, + j, 1, f); + blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target, + j, 1, f); + } } } } @@ -1362,6 +1377,12 @@ void util_blitter_blit_generic(struct blitter_context *blitter, return; } + if (blit_stencil || + (dstbox->width == abs(srcbox->width) && + dstbox->height == abs(srcbox->height))) { + filter = PIPE_TEX_FILTER_NEAREST; + } + /* Check whether the states are properly saved. */ blitter_set_running_flag(ctx); blitter_check_saved_vertex_states(ctx); @@ -1405,15 +1426,11 @@ void util_blitter_blit_generic(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); ctx->bind_fs_state(pipe, blitter_get_fs_texfetch_col(ctx, src->format, src_target, - src_samples, dst_samples)); + src_samples, dst_samples, filter)); } /* Set the linear filter only for scaled color non-MSAA blits. */ - if (filter == PIPE_TEX_FILTER_LINEAR && - !blit_depth && !blit_stencil && - src_samples <= 1 && - (dstbox->width != abs(srcbox->width) || - dstbox->height != abs(srcbox->height))) { + if (filter == PIPE_TEX_FILTER_LINEAR) { if (src_target == PIPE_TEXTURE_RECT) { sampler_state = ctx->sampler_state_rect_linear; } else { diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 5be3a13e8e3..82f23ebec25 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -612,3 +612,98 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, return ureg_create_shader_and_destroy(ureg, pipe); } + + +void * +util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, + unsigned tgsi_tex, unsigned nr_samples, + boolean is_uint, boolean is_sint) +{ + struct ureg_program *ureg; + struct ureg_src sampler, coord; + struct ureg_dst out, tmp, top, bottom; + struct ureg_dst tmp_coord[4], tmp_sum[4]; + int i, c; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; + + /* Declarations. */ + sampler = ureg_DECL_sampler(ureg, 0); + coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, + TGSI_INTERPOLATE_LINEAR); + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + for (c = 0; c < 4; c++) + tmp_sum[c] = ureg_DECL_temporary(ureg); + for (c = 0; c < 4; c++) + tmp_coord[c] = ureg_DECL_temporary(ureg); + tmp = ureg_DECL_temporary(ureg); + top = ureg_DECL_temporary(ureg); + bottom = ureg_DECL_temporary(ureg); + + /* Instructions. */ + for (c = 0; c < 4; c++) + ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0)); + + /* Get 4 texture coordinates for the bilinear filter. */ + ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */ + ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]), + ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */ + ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]), + ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */ + ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]), + ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */ + + for (i = 0; i < nr_samples; i++) { + for (c = 0; c < 4; c++) { + /* Read one sample. */ + ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W), + ureg_imm1u(ureg, i)); + ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler); + + if (is_uint) + ureg_U2F(ureg, tmp, ureg_src(tmp)); + else if (is_sint) + ureg_I2F(ureg, tmp, ureg_src(tmp)); + + /* Add it to the sum.*/ + ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp)); + } + } + + /* Calculate the average. */ + for (c = 0; c < 4; c++) + ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), + ureg_imm1f(ureg, 1.0 / nr_samples)); + + /* Take the 4 average values and apply a standard bilinear filter. */ + ureg_FRC(ureg, tmp, coord); + + ureg_LRP(ureg, top, + ureg_scalar(ureg_src(tmp), 0), + ureg_src(tmp_sum[1]), + ureg_src(tmp_sum[0])); + + ureg_LRP(ureg, bottom, + ureg_scalar(ureg_src(tmp), 0), + ureg_src(tmp_sum[3]), + ureg_src(tmp_sum[2])); + + ureg_LRP(ureg, tmp, + ureg_scalar(ureg_src(tmp), 1), + ureg_src(bottom), + ureg_src(top)); + + /* Convert to the texture format and return. */ + if (is_uint) + ureg_F2U(ureg, out, ureg_src(tmp)); + else if (is_sint) + ureg_F2I(ureg, out, ureg_src(tmp)); + else + ureg_MOV(ureg, out, ureg_src(tmp)); + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index ea9208c2c7a..e81d99414eb 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -130,6 +130,12 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe, unsigned tgsi_tex, unsigned nr_samples, boolean is_uint, boolean is_sint); + +void * +util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, + unsigned tgsi_tex, unsigned nr_samples, + boolean is_uint, boolean is_sint); + #ifdef __cplusplus } #endif -- 2.30.2