From: Nicolai Hähnle Date: Sat, 23 Sep 2017 11:20:25 +0000 (+0200) Subject: radeonsi: clamp depth comparison value only for fixed point formats X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4c56e070296be6f53bfc1a3a4c864f12c035d3a4;p=mesa.git radeonsi: clamp depth comparison value only for fixed point formats The hardware usually does this automatically. However, we upgrade depth to Z32_FLOAT to enable TC-compatible HTILE, which means the hardware no longer clamps the comparison value for us. The only way to tell in the shader whether a clamp is required seems to be to communicate an additional bit in the descriptor table. While VI has some unused bits in the resource descriptor, those bits have unfortunately all been used in gfx9. So we use an unused bit in the sampler state instead. Fixes dEQP-GLES3.functional.texture.shadow.2d.linear.equal_depth_component32f and many other tests in dEQP-GLES3.functional.texture.shadow.* Fixes: d4d9ec55c589 ("radeonsi: implement TC-compatible HTILE") Reviewed-by: Marek Olšák Tested-by: Dieter Nützel --- diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index a8c78c1b2bf..1016f674707 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -2453,6 +2453,8 @@ #define S_008F3C_BORDER_COLOR_PTR(x) (((unsigned)(x) & 0xFFF) << 0) #define G_008F3C_BORDER_COLOR_PTR(x) (((x) >> 0) & 0xFFF) #define C_008F3C_BORDER_COLOR_PTR 0xFFFFF000 +/* The UPGRADED_DEPTH field is driver-specific and does not exist in hardware. */ +#define S_008F3C_UPGRADED_DEPTH(x) (((unsigned)(x) & 0x1) << 29) #define S_008F3C_BORDER_COLOR_TYPE(x) (((unsigned)(x) & 0x03) << 30) #define G_008F3C_BORDER_COLOR_TYPE(x) (((x) >> 30) & 0x03) #define C_008F3C_BORDER_COLOR_TYPE 0x3FFFFFFF diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 1259257eead..debedd402b4 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -250,6 +250,7 @@ struct r600_texture { float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ uint8_t stencil_clear_value; + bool upgraded_depth; /* upgraded from unorm to Z32_FLOAT */ bool non_disp_tiling; /* R600-Cayman only */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index a9a1b2627e5..eb7560e2c08 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1153,8 +1153,11 @@ r600_texture_create_object(struct pipe_screen *screen, if (rscreen->chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM) rtex->db_render_format = base->format; - else + else { rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; + rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT && + base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT; + } } else { rtex->db_render_format = base->format; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 26198d8c1f3..fbb72d900b1 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -422,8 +422,13 @@ static void si_set_sampler_view_desc(struct si_context *sctx, /* Disable FMASK and bind sampler state in [12:15]. */ memcpy(desc + 8, null_texture_descriptor, 4*4); - if (sstate) - memcpy(desc + 12, sstate->val, 4*4); + if (sstate) { + if (!is_buffer && rtex->upgraded_depth && + !sview->is_stencil_sampler) + memcpy(desc + 12, sstate->upgraded_depth_val, 4*4); + else + memcpy(desc + 12, sstate->val, 4*4); + } } } @@ -845,13 +850,25 @@ static void si_bind_sampler_states(struct pipe_context *ctx, /* If FMASK is bound, don't overwrite it. * The sampler state will be set after FMASK is unbound. */ - if (samplers->views.views[slot] && - samplers->views.views[slot]->texture && - samplers->views.views[slot]->texture->target != PIPE_BUFFER && - ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size) + struct si_sampler_view *sview = + (struct si_sampler_view *)samplers->views.views[slot]; + + struct r600_texture *tex = NULL; + + if (sview && sview->base.texture && + sview->base.texture->target != PIPE_BUFFER) + tex = (struct r600_texture *)sview->base.texture; + + if (tex && tex->fmask.size) continue; - memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4); + if (tex && tex->upgraded_depth && !sview->is_stencil_sampler) + memcpy(desc->list + desc_slot * 16 + 12, + sstates[i]->upgraded_depth_val, 4*4); + else + memcpy(desc->list + desc_slot * 16 + 12, + sstates[i]->val, 4*4); + sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ce6aa3be96b..80f38ea29bb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -167,6 +167,7 @@ struct si_sampler_state { unsigned magic; #endif uint32_t val[4]; + uint32_t upgraded_depth_val[4]; }; struct si_cs_shader_state { diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 0f65984db07..be92044750c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1401,15 +1401,28 @@ static void tex_fetch_args( z = coords[ref_pos]; } - /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, + /* Section 8.23.1 (Depth Texture Comparison Mode) of the + * OpenGL 4.5 spec says: + * + * "If the texture’s internal format indicates a fixed-point + * depth texture, then D_t and D_ref are clamped to the + * range [0, 1]; otherwise no clamping is performed." + * + * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. - * - * It's unnecessary if the original texture format was - * Z32_FLOAT, but we don't know that here. */ - if (ctx->screen->b.chip_class >= VI) - z = ac_build_clamp(&ctx->ac, z); + if (ctx->screen->b.chip_class >= VI) { + LLVMValueRef upgraded; + LLVMValueRef clamped; + upgraded = LLVMBuildExtractElement(gallivm->builder, samp_ptr, + LLVMConstInt(ctx->i32, 3, false), ""); + upgraded = LLVMBuildLShr(gallivm->builder, upgraded, + LLVMConstInt(ctx->i32, 29, false), ""); + upgraded = LLVMBuildTrunc(gallivm->builder, upgraded, ctx->i1, ""); + clamped = ac_build_clamp(&ctx->ac, z); + z = LLVMBuildSelect(gallivm->builder, upgraded, clamped, z, ""); + } address[count++] = z; } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a468a1d35a2..8a14404f0e8 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3997,6 +3997,10 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | S_008F3C_BORDER_COLOR_TYPE(border_color_type); + + memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); + rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); + return rstate; }