radv: set cb base tile swizzles for MRT speedups (v4)
authorDave Airlie <airlied@redhat.com>
Fri, 7 Jul 2017 05:56:57 +0000 (06:56 +0100)
committerDave Airlie <airlied@redhat.com>
Mon, 17 Jul 2017 00:43:41 +0000 (01:43 +0100)
This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.

v2: don't apply swizzle to CMASK. the eg docs don't mention
it, and we clearly don't align cmask for that.
v3: disable surf index for dedicated images, as these will
most likely be shared, and I don't think the metadata has
space for this info in it yet.
v4: update for shareable images, rename combined_swizzle
to tile_swizzle

This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_surface.c
src/amd/common/ac_surface.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_private.h

index a4df595653b4e63322ca8863dad1230b0c77379b..1677d1b3155f4c1e3ec259baf281b1127cf4a358 100644 (file)
@@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
                surf->htile_size *= 2;
 
        surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+       /* workout base swizzle */
+       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+               ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+               ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+               AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+               AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
+               AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
+               AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
+               AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
+               AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
+               surf->u.legacy.tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+       }
        return 0;
 }
 
index 4d893ff5009e372b8525eb321b68a836b0dd4321..3eaef639aad61e06b7f36dfca5e69e71a6841c2c 100644 (file)
@@ -97,6 +97,7 @@ struct legacy_surf_layout {
     unsigned                    depth_adjusted:1;
     unsigned                    stencil_adjusted:1;
 
+    uint8_t                     tile_swizzle;
     struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
     struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
     uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -194,6 +195,7 @@ struct ac_surf_info {
        uint32_t width;
        uint32_t height;
        uint32_t depth;
+       uint32_t surf_index;
        uint8_t samples;
        uint8_t levels;
        uint16_t array_size;
index 2670d47fdb817314cc3d4be057e72c534cec6933..3b405838f391e513e60c0159b4f34a34ff284a95 100644 (file)
@@ -2814,7 +2814,8 @@ radv_initialise_color_surface(struct radv_device *device,
        }
 
        cb->cb_color_base = va >> 8;
-
+       if (device->physical_device->rad_info.chip_class < GFX9)
+               cb->cb_color_base |= iview->image->surface.u.legacy.tile_swizzle;
        /* CMASK variables */
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->cmask.offset;
@@ -2823,6 +2824,8 @@ radv_initialise_color_surface(struct radv_device *device,
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->dcc_offset;
        cb->cb_dcc_base = va >> 8;
+       if (device->physical_device->rad_info.chip_class < GFX9)
+               cb->cb_dcc_base |= iview->image->surface.u.legacy.tile_swizzle;
 
        uint32_t max_slice = radv_surface_layer_count(iview);
        cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -2838,6 +2841,8 @@ radv_initialise_color_surface(struct radv_device *device,
        if (iview->image->fmask.size) {
                va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
                cb->cb_color_fmask = va >> 8;
+               if (device->physical_device->rad_info.chip_class < GFX9)
+                       cb->cb_color_fmask |= iview->image->surface.u.legacy.tile_swizzle;
        } else {
                cb->cb_color_fmask = cb->cb_color_base;
        }
index 17ee74b5f5b31dd8b7d7ed903955bc180fafc5b0..a8af4fd6d6860f3b6c1643650043190d73720ecc 100644 (file)
@@ -32,6 +32,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
                   const struct radv_image_create_info *create_info)
@@ -210,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
                va += base_level_info->offset;
 
        state[0] = va >> 8;
+       if (chip_class < GFX9)
+               state[0] |= image->surface.u.legacy.tile_swizzle;
        state[1] &= C_008F14_BASE_ADDRESS_HI;
        state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
        state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@@ -225,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
                                meta_va += base_level_info->dcc_offset;
                        state[6] |= S_008F28_COMPRESSION_EN(1);
                        state[7] = meta_va >> 8;
-
+                       if (chip_class < GFX9)
+                               state[7] |= image->surface.u.legacy.tile_swizzle;
                }
        }
 
@@ -473,6 +477,8 @@ si_make_texture_descriptor(struct radv_device *device,
                }
 
                fmask_state[0] = va >> 8;
+               if (device->physical_device->rad_info.chip_class < GFX9)
+                       fmask_state[0] |= image->surface.u.legacy.tile_swizzle;
                fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
                        S_008F14_DATA_FORMAT_GFX6(fmask_format) |
                        S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -792,6 +798,9 @@ radv_image_create(VkDevice _device,
 
        image->shareable = vk_find_struct_const(pCreateInfo->pNext,
                                                EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
+       if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
+               image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+       }
 
        radv_init_surface(device, &image->surface, create_info);
 
index 2f633b4dbd95451dce2c1f72dd5e1a944cf3de29..e1fb55654946d7b8b8c5ee5a87564eb92d11cecf 100644 (file)
@@ -547,6 +547,8 @@ struct radv_device {
 
        /* Backup in-memory cache to be used if the app doesn't provide one */
        struct radv_pipeline_cache *                mem_cache;
+
+       uint32_t image_mrt_offset_counter;
 };
 
 struct radv_device_memory {