From 0294eaed809fb5117c45a4c3f2e686fea4e27196 Mon Sep 17 00:00:00 2001
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Date: Wed, 29 Jul 2020 12:14:28 +0200
Subject: [PATCH] radeonsi: extend workaround for
 KHR-GL45.texture_view.view_classes on gfx9
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This is a followup of 19db1a540c248e330284a6c9733633d0695677a3.
This commit fixed KHR-GL45.texture_view.view_classes on gfx9 but the test
still failed when using AMD_DEBUG=nodma or AMD_DEBUG=nodcc,nodma.

The workaround is now used from si_resource_copy_region so it covers the
previous call site (si_texture_transfer_map) and the sctx->dma_copy
fallback code.

Fixes: 19db1a540c2 ("radeonsi: add a workaround to fix KHR-GL45.texture_view.view_classes on gfx9")
Reviewed-by: Marek OlÅ¡Ã¡k <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6115>
---
 src/gallium/drivers/radeonsi/si_blit.c        | 24 +++++++++++++++++
 .../drivers/radeonsi/si_compute_blit.c        | 27 ++++++++++++++++---
 src/gallium/drivers/radeonsi/si_texture.c     | 15 -----------
 3 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index ab7c031af23..4e77e61ac86 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -824,6 +824,28 @@ struct texture_orig_info {
    unsigned npix0_y;
 };
 
+static void si_use_compute_copy_for_float_formats(struct si_context *sctx,
+                                                  struct pipe_resource *texture,
+                                                  unsigned level) {
+   struct si_texture *tex = (struct si_texture *)texture;
+
+   /* If we are uploading into FP16 or R11G11B10_FLOAT via a blit, CB clobbers NaNs,
+    * so in order to preserve them exactly, we have to use the compute blit.
+    * The compute blit is used only when the destination doesn't have DCC, so
+    * disable it here, which is kinda a hack.
+    * If we are uploading into 32-bit floats with DCC via a blit, NaNs will also get
+    * lost so we need to disable DCC as well.
+    *
+    * This makes KHR-GL45.texture_view.view_classes pass on gfx9.
+    * gfx10 has the same issue, but the test doesn't use a large enough texture
+    * to enable DCC and fail, so it always passes.
+    */
+   if (vi_dcc_enabled(tex, level) &&
+       util_format_is_float(texture->format)) {
+      si_texture_disable_dcc(sctx, tex);
+   }
+}
+
 void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst,
                              unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
                              struct pipe_resource *src, unsigned src_level,
@@ -844,6 +866,8 @@ void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst
       return;
    }
 
+   si_use_compute_copy_for_float_formats(sctx, dst, dst_level);
+
    if (!util_format_is_compressed(src->format) && !util_format_is_compressed(dst->format) &&
        !util_format_is_depth_or_stencil(src->format) && src->nr_samples <= 1 &&
        !vi_dcc_enabled(sdst, dst_level) &&
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c
index a9ec6fefc7f..958fa6e9299 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -435,6 +435,29 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
 
    assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
 
+   if (!vi_dcc_enabled((struct si_texture*)src, src_level) &&
+       src_format == dst_format &&
+       util_format_is_float(src_format) &&
+       !util_format_is_compressed(src_format)) {
+      /* Interpret as integer values to avoid NaN issues */
+      switch(util_format_get_blocksizebits(src_format)) {
+        case 16:
+          src_format = dst_format = PIPE_FORMAT_R16_UINT;
+          break;
+        case 32:
+          src_format = dst_format = PIPE_FORMAT_R32_UINT;
+          break;
+        case 64:
+          src_format = dst_format = PIPE_FORMAT_R32G32_UINT;
+          break;
+        case 128:
+          src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT;
+          break;
+        default:
+          assert(false);
+      }
+   }
+
    if (util_format_is_subsampled_422(src_format)) {
       src_format = dst_format = PIPE_FORMAT_R32_UINT;
       /* Interpreting 422 subsampled format (16 bpp) as 32 bpp
@@ -495,10 +518,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
    image[1].u.tex.last_layer = dst->target == PIPE_TEXTURE_3D ? u_minify(dst->depth0, dst_level) - 1
                                                               : (unsigned)(dst->array_size - 1);
 
-   if (sctx->chip_class < GFX10_3 &&
-       src->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
-      image[0].format = image[1].format = PIPE_FORMAT_R32_UINT;
-
    /* SNORM8 blitting has precision issues on some chips. Use the SINT
     * equivalent instead, which doesn't force DCC decompression.
     * Note that some chips avoid this issue by using SDMA.
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 875341bf06e..01439114241 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1626,21 +1626,6 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
    assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
    assert(box->width && box->height && box->depth);
 
-   /* If we are uploading into FP16 or R11G11B10_FLOAT via a blit, CB clobbers NaNs,
-    * so in order to preserve them exactly, we have to use the compute blit.
-    * The compute blit is used only when the destination doesn't have DCC, so
-    * disable it here, which is kinda a hack.
-    *
-    * This makes KHR-GL45.texture_view.view_classes pass on gfx9.
-    * gfx10 has the same issue, but the test doesn't use a large enough texture
-    * to enable DCC and fail, so it always passes.
-    */
-   const struct util_format_description *desc = util_format_description(texture->format);
-   if (vi_dcc_enabled(tex, level) &&
-       desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
-       desc->channel[0].size < 32)
-      si_texture_disable_dcc(sctx, tex);
-
    if (tex->is_depth) {
       /* Depth textures use staging unconditionally. */
       use_staging_texture = true;
-- 
2.30.2