radeonsi: clamp depth comparison value only for fixed point formats
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Sat, 23 Sep 2017 11:20:25 +0000 (13:20 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 29 Sep 2017 09:44:50 +0000 (11:44 +0200)
The hardware usually does this automatically. However, we upgrade
depth to Z32_FLOAT to enable TC-compatible HTILE, which means the
hardware no longer clamps the comparison value for us.

The only way to tell in the shader whether a clamp is required
seems to be to communicate an additional bit in the descriptor
table. While VI has some unused bits in the resource descriptor,
those bits have unfortunately all been used in gfx9. So we use
an unused bit in the sampler state instead.

Fixes dEQP-GLES3.functional.texture.shadow.2d.linear.equal_depth_component32f
and many other tests in dEQP-GLES3.functional.texture.shadow.*

Fixes: d4d9ec55c589 ("radeonsi: implement TC-compatible HTILE")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
src/amd/common/sid.h
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
src/gallium/drivers/radeonsi/si_state.c

index a8c78c1b2bf7f1dd57ebcff1dbb39719a8f85b05..1016f67470732c106a677bd52f46bbbf2c2256a3 100644 (file)
 #define   S_008F3C_BORDER_COLOR_PTR(x)                                (((unsigned)(x) & 0xFFF) << 0)
 #define   G_008F3C_BORDER_COLOR_PTR(x)                                (((x) >> 0) & 0xFFF)
 #define   C_008F3C_BORDER_COLOR_PTR                                   0xFFFFF000
+/* The UPGRADED_DEPTH field is driver-specific and does not exist in hardware. */
+#define   S_008F3C_UPGRADED_DEPTH(x)                                  (((unsigned)(x) & 0x1) << 29)
 #define   S_008F3C_BORDER_COLOR_TYPE(x)                               (((unsigned)(x) & 0x03) << 30)
 #define   G_008F3C_BORDER_COLOR_TYPE(x)                               (((x) >> 30) & 0x03)
 #define   C_008F3C_BORDER_COLOR_TYPE                                  0x3FFFFFFF
index 1259257eeade0765f761f901c782a84ed46cf288..debedd402b429361122082dd1911a126968b776c 100644 (file)
@@ -250,6 +250,7 @@ struct r600_texture {
        float                           depth_clear_value;
        bool                            stencil_cleared; /* if it was cleared at least once */
        uint8_t                         stencil_clear_value;
+       bool                            upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
 
        bool                            non_disp_tiling; /* R600-Cayman only */
 
index a9a1b2627e596e060691b8d3ee4822bd837a42c4..eb7560e2c08603fc29287bc1909d79f6ad305484 100644 (file)
@@ -1153,8 +1153,11 @@ r600_texture_create_object(struct pipe_screen *screen,
                if (rscreen->chip_class >= GFX9 &&
                    base->format == PIPE_FORMAT_Z16_UNORM)
                        rtex->db_render_format = base->format;
-               else
+               else {
                        rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+                       rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
+                                              base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
+               }
        } else {
                rtex->db_render_format = base->format;
        }
index 26198d8c1f3cb9c9e8e8de55c08d593d1a6bff2a..fbb72d900b19754ddd4b38e02241b8f4d39f24eb 100644 (file)
@@ -422,8 +422,13 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
                /* Disable FMASK and bind sampler state in [12:15]. */
                memcpy(desc + 8, null_texture_descriptor, 4*4);
 
-               if (sstate)
-                       memcpy(desc + 12, sstate->val, 4*4);
+               if (sstate) {
+                       if (!is_buffer && rtex->upgraded_depth &&
+                           !sview->is_stencil_sampler)
+                               memcpy(desc + 12, sstate->upgraded_depth_val, 4*4);
+                       else
+                               memcpy(desc + 12, sstate->val, 4*4);
+               }
        }
 }
 
@@ -845,13 +850,25 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
                /* If FMASK is bound, don't overwrite it.
                 * The sampler state will be set after FMASK is unbound.
                 */
-               if (samplers->views.views[slot] &&
-                   samplers->views.views[slot]->texture &&
-                   samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
-                   ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
+               struct si_sampler_view *sview =
+                       (struct si_sampler_view *)samplers->views.views[slot];
+
+               struct r600_texture *tex = NULL;
+
+               if (sview && sview->base.texture &&
+                   sview->base.texture->target != PIPE_BUFFER)
+                       tex = (struct r600_texture *)sview->base.texture;
+
+               if (tex && tex->fmask.size)
                        continue;
 
-               memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
+               if (tex && tex->upgraded_depth && !sview->is_stencil_sampler)
+                       memcpy(desc->list + desc_slot * 16 + 12,
+                              sstates[i]->upgraded_depth_val, 4*4);
+               else
+                       memcpy(desc->list + desc_slot * 16 + 12,
+                              sstates[i]->val, 4*4);
+
                sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
        }
 }
index ce6aa3be96b27898642a41cd06a3d7a4369ed9e3..80f38ea29bbabaf39981e28d019a0f8eded10f97 100644 (file)
@@ -167,6 +167,7 @@ struct si_sampler_state {
        unsigned                        magic;
 #endif
        uint32_t                        val[4];
+       uint32_t                        upgraded_depth_val[4];
 };
 
 struct si_cs_shader_state {
index 0f65984db071c11478d16d501cc600d960c81c7f..be92044750cd6864678fbab5b61c4a43df25bf01 100644 (file)
@@ -1401,15 +1401,28 @@ static void tex_fetch_args(
                        z = coords[ref_pos];
                }
 
-               /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+               /* Section 8.23.1 (Depth Texture Comparison Mode) of the
+                * OpenGL 4.5 spec says:
+                *
+                *    "If the texture’s internal format indicates a fixed-point
+                *     depth texture, then D_t and D_ref are clamped to the
+                *     range [0, 1]; otherwise no clamping is performed."
+                *
+                * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
                 * so the depth comparison value isn't clamped for Z16 and
                 * Z24 anymore. Do it manually here.
-                *
-                * It's unnecessary if the original texture format was
-                * Z32_FLOAT, but we don't know that here.
                 */
-               if (ctx->screen->b.chip_class >= VI)
-                       z = ac_build_clamp(&ctx->ac, z);
+               if (ctx->screen->b.chip_class >= VI) {
+                       LLVMValueRef upgraded;
+                       LLVMValueRef clamped;
+                       upgraded = LLVMBuildExtractElement(gallivm->builder, samp_ptr,
+                                                          LLVMConstInt(ctx->i32, 3, false), "");
+                       upgraded = LLVMBuildLShr(gallivm->builder, upgraded,
+                                                LLVMConstInt(ctx->i32, 29, false), "");
+                       upgraded = LLVMBuildTrunc(gallivm->builder, upgraded, ctx->i1, "");
+                       clamped = ac_build_clamp(&ctx->ac, z);
+                       z = LLVMBuildSelect(gallivm->builder, upgraded, clamped, z, "");
+               }
 
                address[count++] = z;
        }
index a468a1d35a26977cfa996faa16bc471b03879199..8a14404f0e83262b3e544029d9f6df593476891f 100644 (file)
@@ -3997,6 +3997,10 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                          S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
        rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
                         S_008F3C_BORDER_COLOR_TYPE(border_color_type);
+
+       memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
+       rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
+
        return rstate;
 }