gallium/u_blitter: accelerate depth-stencil copying using shader stencil export
authorMarek Olšák <maraeo@gmail.com>
Sat, 7 Jul 2012 21:48:37 +0000 (23:48 +0200)
committerMarek Olšák <maraeo@gmail.com>
Thu, 12 Jul 2012 00:08:30 +0000 (02:08 +0200)
This fixes stencil buffer write transfers on r600g.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_blitter.h
src/gallium/auxiliary/util/u_simple_shaders.c
src/gallium/auxiliary/util/u_simple_shaders.h

index 99bde548e57bc9e0ec243aa547c271029a81851d..b422ff8faedfc0edc5d9b297f97aa7009436313b 100644 (file)
@@ -78,6 +78,7 @@ struct blitter_context_priv
    /* FS which outputs a depth from a texture,
       where the index is PIPE_TEXTURE_* to be sampled. */
    void *fs_texfetch_depth[PIPE_MAX_TEXTURE_TYPES];
+   void *fs_texfetch_depthstencil[PIPE_MAX_TEXTURE_TYPES];
 
    /* Blend state. */
    void *blend_write_color;   /**< blend state with writemask of RGBA */
@@ -112,6 +113,7 @@ struct blitter_context_priv
    boolean has_geometry_shader;
    boolean vertex_has_integers;
    boolean has_stream_out;
+   boolean has_stencil_export;
 };
 
 static void blitter_draw_rectangle(struct blitter_context *blitter,
@@ -163,6 +165,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
       pipe->screen->get_param(pipe->screen,
                               PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0;
 
+   ctx->has_stencil_export =
+         pipe->screen->get_param(pipe->screen,
+                                 PIPE_CAP_SHADER_STENCIL_EXPORT);
+
    /* blend state objects */
    memset(&blend, 0, sizeof(blend));
    ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend);
@@ -314,6 +320,8 @@ void util_blitter_destroy(struct blitter_context *blitter)
          pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
       if (ctx->fs_texfetch_depth[i])
          pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
+      if (ctx->fs_texfetch_depthstencil[i])
+         pipe->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil[i]);
    }
 
    for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++) {
@@ -718,6 +726,26 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
    return ctx->fs_texfetch_depth[tex_target];
 }
 
+static INLINE
+void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
+                                           unsigned tex_target)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* Create the fragment shader on-demand. */
+   if (!ctx->fs_texfetch_depthstencil[tex_target]) {
+      unsigned tgsi_tex = pipe_tex_to_tgsi_tex(tex_target);
+
+      ctx->fs_texfetch_depthstencil[tex_target] =
+         util_make_fragment_tex_shader_writedepthstencil(pipe, tgsi_tex,
+                                                  TGSI_INTERPOLATE_LINEAR);
+   }
+
+   return ctx->fs_texfetch_depthstencil[tex_target];
+}
+
 static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx)
 {
    struct pipe_context *pipe = ctx->base.pipe;
@@ -916,6 +944,8 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
    struct pipe_sampler_view src_templ, *src_view;
    unsigned bind;
    boolean is_stencil, is_depth;
+   const struct util_format_description *src_desc =
+         util_format_description(src->format);
 
    /* Give up if textures are not set. */
    assert(dst && src);
@@ -925,8 +955,8 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
    assert(src->target < PIPE_MAX_TEXTURE_TYPES);
 
    /* Is this a ZS format? */
-   is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
-   is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
+   is_depth = util_format_has_depth(src_desc);
+   is_stencil = util_format_has_stencil(src_desc);
 
    if (is_depth || is_stencil)
       bind = PIPE_BIND_DEPTH_STENCIL;
@@ -935,7 +965,7 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
 
    /* Check if we can sample from and render to the surfaces. */
    /* (assuming copying a stencil buffer is not possible) */
-   if ((!ignore_stencil && is_stencil) ||
+   if ((!ignore_stencil && is_stencil && !ctx->has_stencil_export) ||
        !screen->is_format_supported(screen, dst->format, dst->target,
                                     dst->nr_samples, bind) ||
        !screen->is_format_supported(screen, src->format, src->target,
@@ -976,6 +1006,21 @@ void util_blitter_copy_texture_view(struct blitter_context *blitter,
    enum pipe_texture_target src_target = src->texture->target;
    unsigned width = srcbox->width;
    unsigned height = srcbox->height;
+   boolean is_stencil, is_depth;
+   const struct util_format_description *src_desc =
+         util_format_description(src->format);
+
+   is_depth = util_format_has_depth(src_desc);
+   is_stencil = util_format_has_stencil(src_desc);
+
+   /* If you want a fallback for stencil copies,
+    * use util_blitter_copy_texture. */
+   if (is_stencil && !ctx->has_stencil_export) {
+      is_stencil = FALSE;
+
+      if (!is_depth)
+         return;
+   }
 
    /* Sanity checks. */
    if (dst->texture == src->texture &&
@@ -997,12 +1042,22 @@ void util_blitter_copy_texture_view(struct blitter_context *blitter,
    fb_state.width = dst->width;
    fb_state.height = dst->height;
 
-   if (util_format_is_depth_or_stencil(dst->format)) {
+   if (is_depth || is_stencil) {
       pipe->bind_blend_state(pipe, ctx->blend_keep_color);
-      pipe->bind_depth_stencil_alpha_state(pipe,
-                                           ctx->dsa_write_depth_keep_stencil);
-      pipe->bind_fs_state(pipe,
-            blitter_get_fs_texfetch_depth(ctx, src_target));
+
+      if (is_depth && is_stencil) {
+         pipe->bind_depth_stencil_alpha_state(pipe,
+                                              ctx->dsa_write_depth_stencil);
+         pipe->bind_fs_state(pipe,
+               blitter_get_fs_texfetch_depthstencil(ctx, src_target));
+      } else if (is_depth) {
+         pipe->bind_depth_stencil_alpha_state(pipe,
+                                              ctx->dsa_write_depth_keep_stencil);
+         pipe->bind_fs_state(pipe,
+               blitter_get_fs_texfetch_depth(ctx, src_target));
+      } else { /* is_stencil */
+         assert(0);
+      }
 
       fb_state.nr_cbufs = 0;
       fb_state.zsbuf = dst;
@@ -1017,9 +1072,29 @@ void util_blitter_copy_texture_view(struct blitter_context *blitter,
       fb_state.zsbuf = 0;
    }
 
-   pipe->bind_fragment_sampler_states(pipe, 1, &ctx->sampler_state);
+   if (is_depth && is_stencil) {
+      /* Setup two samplers, one for depth and the other one for stencil. */
+      struct pipe_sampler_view templ;
+      struct pipe_sampler_view *views[2];
+      void *samplers[2] = {ctx->sampler_state, ctx->sampler_state};
+
+      templ = *src;
+      templ.format = util_format_stencil_only(templ.format);
+      assert(templ.format != PIPE_FORMAT_NONE);
+
+      views[0] = src;
+      views[1] = pipe->create_sampler_view(pipe, src->texture, &templ);
+
+      pipe->set_fragment_sampler_views(pipe, 2, views);
+      pipe->bind_fragment_sampler_states(pipe, 2, samplers);
+
+      pipe_sampler_view_reference(&views[1], NULL);
+   } else {
+      pipe->set_fragment_sampler_views(pipe, 1, &src);
+      pipe->bind_fragment_sampler_states(pipe, 1, &ctx->sampler_state);
+   }
+
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
-   pipe->set_fragment_sampler_views(pipe, 1, &src);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
    blitter_set_common_draw_rect_state(ctx);
index d4d30852b5849c09166a5e43a50ee278eff81d35..2db984c8a901ffc73eafb458543195cd61731b93 100644 (file)
@@ -169,8 +169,8 @@ void util_blitter_clear_depth_custom(struct blitter_context *blitter,
  * The same holds for depth-stencil formats with the exception that stencil
  * cannot be copied unless you set ignore_stencil to FALSE. In that case,
  * a software fallback path is taken and both surfaces must be of the same
- * format.
- * XXX implement hw-accel stencil copy using shader stencil export.
+ * format. If the shader stencil export is supported, stencil copy is always
+ * accelerated.
  *
  * Use pipe_screen->is_format_supported to know your options.
  *
index 320c0f7a830247edbf510b230100dbd09c04b9ec..545b60786675a762e0dc6a27758b87dec6363a7b 100644 (file)
@@ -210,6 +210,60 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
 }
 
 
+/**
+ * Make a simple fragment texture shader which reads the texture unit 0 and 1
+ * and writes it as depth and stencil, respectively.
+ */
+void *
+util_make_fragment_tex_shader_writedepthstencil(struct pipe_context *pipe,
+                                                unsigned tex_target,
+                                                unsigned interp_mode)
+{
+   struct ureg_program *ureg;
+   struct ureg_src depth_sampler, stencil_sampler;
+   struct ureg_src tex;
+   struct ureg_dst out, depth, stencil;
+   struct ureg_src imm;
+
+   ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+   if (ureg == NULL)
+      return NULL;
+
+   depth_sampler = ureg_DECL_sampler( ureg, 0 );
+   stencil_sampler = ureg_DECL_sampler( ureg, 1 );
+
+   tex = ureg_DECL_fs_input( ureg,
+                             TGSI_SEMANTIC_GENERIC, 0,
+                             interp_mode );
+
+   out = ureg_DECL_output( ureg,
+                           TGSI_SEMANTIC_COLOR,
+                           0 );
+
+   depth = ureg_DECL_output( ureg,
+                             TGSI_SEMANTIC_POSITION,
+                             0 );
+
+   stencil = ureg_DECL_output( ureg,
+                             TGSI_SEMANTIC_STENCIL,
+                             0 );
+
+   imm = ureg_imm4f( ureg, 0, 0, 0, 1 );
+
+   ureg_MOV( ureg, out, imm );
+
+   ureg_TEX( ureg,
+             ureg_writemask(depth, TGSI_WRITEMASK_Z),
+             tex_target, tex, depth_sampler );
+   ureg_TEX( ureg,
+             ureg_writemask(stencil, TGSI_WRITEMASK_Y),
+             tex_target, tex, stencil_sampler );
+   ureg_END( ureg );
+
+   return ureg_create_shader_and_destroy( ureg, pipe );
+}
+
+
 /**
  * Make simple fragment color pass-through shader.
  */
index 5f31b72c4d77b70d172584bc81b3da3906192ddc..54d0efe63d7857cb478ed7f12064cdd286bcf531 100644 (file)
@@ -74,6 +74,12 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
                                          unsigned interp_mode);
 
 
+extern void *
+util_make_fragment_tex_shader_writedepthstencil(struct pipe_context *pipe,
+                                                unsigned tex_target,
+                                                unsigned interp_mode);
+
+
 extern void *
 util_make_fragment_passthrough_shader(struct pipe_context *pipe);