gallium/u_blitter: implement shader-based MSAA resolve with bilinear filtering
authorMarek Olšák <marek.olsak@amd.com>
Sat, 7 Dec 2013 16:32:22 +0000 (17:32 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 14 Dec 2013 16:42:08 +0000 (17:42 +0100)
For scaled resolve. The filter is only good for magnification.

If somebody has an idea how to implement a good filter for minification,
I'm all ears. I'd have to use derivatives probably.

Reviewed-by: Brian Paul <brianp@vmware.com>
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_simple_shaders.c
src/gallium/auxiliary/util/u_simple_shaders.h

index e3faf6c028729535b28f71b8ee3dd72c10cf3286..9246bd722cac065c8a71bae00b6199909061a568 100644 (file)
@@ -95,9 +95,9 @@ struct blitter_context_priv
    void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES];
 
    /* FS which outputs an average of all samples. */
-   void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS];
-   void *fs_resolve_sint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS];
-   void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS];
+   void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
+   void *fs_resolve_sint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
+   void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
 
    /* Blend state. */
    void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */
@@ -342,7 +342,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = blitter->pipe;
-   int i, j;
+   int i, j, f;
 
    for (i = 0; i <= PIPE_MASK_RGBA; i++) {
       pipe->delete_blend_state(pipe, ctx->blend[i]);
@@ -382,16 +382,19 @@ void util_blitter_destroy(struct blitter_context *blitter)
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil[i]);
 
       for (j = 0; j< Elements(ctx->fs_resolve[i]); j++)
-         if (ctx->fs_resolve[i][j])
-            ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j]);
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]);
 
       for (j = 0; j< Elements(ctx->fs_resolve_sint[i]); j++)
-         if (ctx->fs_resolve_sint[i][j])
-            ctx->delete_fs_state(pipe, ctx->fs_resolve_sint[i][j]);
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve_sint[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve_sint[i][j][f]);
 
       for (j = 0; j< Elements(ctx->fs_resolve_uint[i]); j++)
-         if (ctx->fs_resolve_uint[i][j])
-            ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j]);
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve_uint[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]);
    }
 
    ctx->delete_fs_state(pipe, ctx->fs_empty);
@@ -750,7 +753,8 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
                                          enum pipe_format format,
                                          enum pipe_texture_target target,
                                          unsigned src_nr_samples,
-                                         unsigned dst_nr_samples)
+                                         unsigned dst_nr_samples,
+                                         unsigned filter)
 {
    struct pipe_context *pipe = ctx->base.pipe;
    unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples);
@@ -768,17 +772,26 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
          is_uint = util_format_is_pure_uint(format);
          is_sint = util_format_is_pure_sint(format);
 
+         assert(filter < 2);
+
          if (is_uint)
-            shader = &ctx->fs_resolve_uint[target][index];
+            shader = &ctx->fs_resolve_uint[target][index][filter];
          else if (is_sint)
-            shader = &ctx->fs_resolve_sint[target][index];
+            shader = &ctx->fs_resolve_sint[target][index][filter];
          else
-            shader = &ctx->fs_resolve[target][index];
+            shader = &ctx->fs_resolve[target][index][filter];
 
          if (!*shader) {
-            *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex,
-                                                src_nr_samples,
-                                                is_uint, is_sint);
+            if (filter == PIPE_TEX_FILTER_LINEAR) {
+               *shader = util_make_fs_msaa_resolve_bilinear(pipe, tgsi_tex,
+                                                   src_nr_samples,
+                                                   is_uint, is_sint);
+            }
+            else {
+               *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex,
+                                                   src_nr_samples,
+                                                   is_uint, is_sint);
+            }
          }
       }
       else {
@@ -925,7 +938,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_screen *screen = blitter->pipe->screen;
-   unsigned samples, j, target, max_samples;
+   unsigned samples, j, f, target, max_samples;
    boolean has_arraytex, has_cubearraytex;
 
    max_samples = ctx->has_texture_multisample ? 2 : 1;
@@ -955,7 +968,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter)
           * they read one sample.
           */
          blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
-                                     samples, samples);
+                                     samples, samples, 0);
          blitter_get_fs_texfetch_depth(ctx, target, samples);
          if (ctx->has_stencil_export) {
             blitter_get_fs_texfetch_depthstencil(ctx, target, samples);
@@ -973,12 +986,14 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter)
                continue;
             }
 
-            blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
-                                        j, 1);
-            blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target,
-                                        j, 1);
-            blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target,
-                                        j, 1);
+            for (f = 0; f < 2; f++) {
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
+                                           j, 1, f);
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target,
+                                           j, 1, f);
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target,
+                                           j, 1, f);
+            }
          }
       }
    }
@@ -1362,6 +1377,12 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
       return;
    }
 
+   if (blit_stencil ||
+       (dstbox->width == abs(srcbox->width) &&
+        dstbox->height == abs(srcbox->height))) {
+      filter = PIPE_TEX_FILTER_NEAREST;
+   }
+
    /* Check whether the states are properly saved. */
    blitter_set_running_flag(ctx);
    blitter_check_saved_vertex_states(ctx);
@@ -1405,15 +1426,11 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
       ctx->bind_fs_state(pipe,
             blitter_get_fs_texfetch_col(ctx, src->format, src_target,
-                                        src_samples, dst_samples));
+                                        src_samples, dst_samples, filter));
    }
 
    /* Set the linear filter only for scaled color non-MSAA blits. */
-   if (filter == PIPE_TEX_FILTER_LINEAR &&
-       !blit_depth && !blit_stencil &&
-       src_samples <= 1 &&
-       (dstbox->width != abs(srcbox->width) ||
-        dstbox->height != abs(srcbox->height))) {
+   if (filter == PIPE_TEX_FILTER_LINEAR) {
       if (src_target == PIPE_TEXTURE_RECT) {
          sampler_state = ctx->sampler_state_rect_linear;
       } else {
index 5be3a13e8e3a11b48175a081ced6c443f10d185d..82f23ebec2539ed252f3138614c330fbffce5a6d 100644 (file)
@@ -612,3 +612,98 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe,
 
    return ureg_create_shader_and_destroy(ureg, pipe);
 }
+
+
+void *
+util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
+                                   unsigned tgsi_tex, unsigned nr_samples,
+                                   boolean is_uint, boolean is_sint)
+{
+   struct ureg_program *ureg;
+   struct ureg_src sampler, coord;
+   struct ureg_dst out, tmp, top, bottom;
+   struct ureg_dst tmp_coord[4], tmp_sum[4];
+   int i, c;
+
+   ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!ureg)
+      return NULL;
+
+   /* Declarations. */
+   sampler = ureg_DECL_sampler(ureg, 0);
+   coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
+                              TGSI_INTERPOLATE_LINEAR);
+   out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+   for (c = 0; c < 4; c++)
+      tmp_sum[c] = ureg_DECL_temporary(ureg);
+   for (c = 0; c < 4; c++)
+      tmp_coord[c] = ureg_DECL_temporary(ureg);
+   tmp = ureg_DECL_temporary(ureg);
+   top = ureg_DECL_temporary(ureg);
+   bottom = ureg_DECL_temporary(ureg);
+
+   /* Instructions. */
+   for (c = 0; c < 4; c++)
+      ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0));
+
+   /* Get 4 texture coordinates for the bilinear filter. */
+   ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */
+   ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]),
+             ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */
+   ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]),
+             ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */
+   ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]),
+             ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */
+
+   for (i = 0; i < nr_samples; i++) {
+      for (c = 0; c < 4; c++) {
+         /* Read one sample. */
+         ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W),
+                  ureg_imm1u(ureg, i));
+         ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler);
+
+         if (is_uint)
+            ureg_U2F(ureg, tmp, ureg_src(tmp));
+         else if (is_sint)
+            ureg_I2F(ureg, tmp, ureg_src(tmp));
+
+         /* Add it to the sum.*/
+         ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp));
+      }
+   }
+
+   /* Calculate the average. */
+   for (c = 0; c < 4; c++)
+      ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]),
+               ureg_imm1f(ureg, 1.0 / nr_samples));
+
+   /* Take the 4 average values and apply a standard bilinear filter. */
+   ureg_FRC(ureg, tmp, coord);
+
+   ureg_LRP(ureg, top,
+            ureg_scalar(ureg_src(tmp), 0),
+            ureg_src(tmp_sum[1]),
+            ureg_src(tmp_sum[0]));
+
+   ureg_LRP(ureg, bottom,
+            ureg_scalar(ureg_src(tmp), 0),
+            ureg_src(tmp_sum[3]),
+            ureg_src(tmp_sum[2]));
+
+   ureg_LRP(ureg, tmp,
+            ureg_scalar(ureg_src(tmp), 1),
+            ureg_src(bottom),
+            ureg_src(top));
+
+   /* Convert to the texture format and return. */
+   if (is_uint)
+      ureg_F2U(ureg, out, ureg_src(tmp));
+   else if (is_sint)
+      ureg_F2I(ureg, out, ureg_src(tmp));
+   else
+      ureg_MOV(ureg, out, ureg_src(tmp));
+
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
index ea9208c2c7a54881d57e89198650166b51c5a0d4..e81d99414eb92220d0215d15df17f7125645e3fb 100644 (file)
@@ -130,6 +130,12 @@ util_make_fs_msaa_resolve(struct pipe_context *pipe,
                           unsigned tgsi_tex, unsigned nr_samples,
                           boolean is_uint, boolean is_sint);
 
+
+void *
+util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
+                                   unsigned tgsi_tex, unsigned nr_samples,
+                                   boolean is_uint, boolean is_sint);
+
 #ifdef __cplusplus
 }
 #endif