radv,vulkan: add a new x11 wsi drirc workaround for DOOM Eternal
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 1 Jul 2020 06:06:09 +0000 (08:06 +0200)
committerMarge Bot <eric+marge@anholt.net>
Thu, 2 Jul 2020 08:31:57 +0000 (08:31 +0000)
DOOM Eternal happily creates a swapchain with 2 images for IMMEDIATE.
This fixes a 10% performance issue with RADV.

Cc: 20.1 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5704>

src/amd/vulkan/radv_device.c
src/util/00-mesa-defaults.conf
src/util/driconf.h
src/vulkan/wsi/wsi_common.h
src/vulkan/wsi/wsi_common_x11.c

index e83241622963bea368eacd40b9b41a5e10a10a5b..6bec8f4a675b3a9940cbe80c8d8ac7cda8970a75 100644 (file)
@@ -586,6 +586,7 @@ DRI_CONF_BEGIN
                DRI_CONF_ADAPTIVE_SYNC("true")
                DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
                DRI_CONF_VK_X11_STRICT_IMAGE_COUNT("false")
+               DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT("false")
                DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING("false")
                DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP("false")
                DRI_CONF_RADV_NO_DYNAMIC_BOUNDS("false")
index b74118160db8b0b7c53f2ff3eb0fbbd0184ab1d9..ddc65a21fe85c4bd260995458ce9e21e9cf9cdce 100644 (file)
@@ -593,6 +593,12 @@ TODO: document the other workarounds.
         <application name="DOOM" executable="DOOMx64vk.exe">
             <option name="vk_x11_strict_image_count" value="true" />
         </application>
+
+        <!-- DOOM Doom Eternal happily creates a swapchain with 2 images for
+             IMMEDIATE. This fixes a 10% performance issue with RADV. -->
+        <application name="DOOMEternal" executable="DOOMEternalx64vk.exe">
+            <option name="vk_x11_ensure_min_image_count" value="true" />
+        </application>
     </device>
     <!-- vmwgfx doesn't like full buffer swaps and can't sync to vertical retraces.-->
     <device driver="vmwgfx">
index b807e669ce2902cb003764a2904c9c13a94694b9..909768c1c3253508459796c2482015400813a24c 100644 (file)
@@ -307,6 +307,11 @@ DRI_CONF_OPT_BEGIN_B(vk_x11_strict_image_count, def) \
         DRI_CONF_DESC("Force the X11 WSI to create exactly the number of image specified by the application in VkSwapchainCreateInfoKHR::minImageCount") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(def) \
+DRI_CONF_OPT_BEGIN_B(vk_x11_ensure_min_image_count, def) \
+        DRI_CONF_DESC("Force the X11 WSI to create at least the number of image specified by the driver in VkSurfaceCapabilitiesKHR::minImageCount") \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MESA_GLTHREAD(def) \
 DRI_CONF_OPT_BEGIN_B(mesa_glthread, def) \
         DRI_CONF_DESC("Enable offloading GL driver work to a separate thread") \
index 99ccdf84cde30e09989dc5de89da7aa810d9a13d..cbf7e1be14e59474770275b4c8ceea5c9ec0c604 100644 (file)
@@ -109,6 +109,11 @@ struct wsi_device {
        * provided VkSwapchainCreateInfoKH::RminImageCount.
        */
       bool strict_imageCount;
+
+      /* Ensures to create at least the number of image specified by the
+       * driver in VkSurfaceCapabilitiesKHR::minImageCount.
+       */
+      bool ensure_minImageCount;
    } x11;
 
    /* Signals the semaphore such that any wait on the semaphore will wait on
index fd275ce7930dd420102d963d82ddad48aa705e6b..bebeabfb9ebad1538d122da77dd28f5cea9d76a7 100644 (file)
@@ -450,6 +450,33 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
    return VK_SUCCESS;
 }
 
+static uint32_t
+x11_get_min_image_count(struct wsi_device *wsi_device)
+{
+   if (wsi_device->x11.override_minImageCount)
+      return wsi_device->x11.override_minImageCount;
+
+   /* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
+    * can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
+    * the render latency is CPU duration + GPU duration.
+    *
+    * This means that with scanout from pageflipping we need 3 frames to run
+    * full speed:
+    * 1) CPU rendering work
+    * 2) GPU rendering work
+    * 3) scanout
+    *
+    * Once we have a nonblocking acquire that returns a semaphore we can merge
+    * 1 and 3. Hence the ideal implementation needs only 2 images, but games
+    * cannot tellwe currently do not have an ideal implementation and that
+    * hence they need to allocate 3 images. So let us do it for them.
+    *
+    * This is a tradeoff as it uses more memory than needed for non-fullscreen
+    * and non-performance intensive applications.
+    */
+   return 3;
+}
+
 static VkResult
 x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
                              struct wsi_device *wsi_device,
@@ -502,31 +529,10 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
                                       VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
    }
 
-   /* For IMMEDIATE and FIFO, most games work in a pipelined manner where the
-    * can produce frames at a rate of 1/MAX(CPU duration, GPU duration), but
-    * the render latency is CPU duration + GPU duration.
-    *
-    * This means that with scanout from pageflipping we need 3 frames to run
-    * full speed:
-    * 1) CPU rendering work
-    * 2) GPU rendering work
-    * 3) scanout
-    *
-    * Once we have a nonblocking acquire that returns a semaphore we can merge
-    * 1 and 3. Hence the ideal implementation needs only 2 images, but games
-    * cannot tellwe currently do not have an ideal implementation and that
-    * hence they need to allocate 3 images. So let us do it for them.
-    *
-    * This is a tradeoff as it uses more memory than needed for non-fullscreen
-    * and non-performance intensive applications.
-    */
-   caps->minImageCount = 3;
+   caps->minImageCount = x11_get_min_image_count(wsi_device);
    /* There is no real maximum */
    caps->maxImageCount = 0;
 
-   if (wsi_device->x11.override_minImageCount)
-      caps->minImageCount = wsi_device->x11.override_minImageCount;
-
    caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
    caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
    caps->maxImageArrayLayers = 1;
@@ -1440,6 +1446,8 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
       num_images = pCreateInfo->minImageCount;
    else if (present_mode == VK_PRESENT_MODE_MAILBOX_KHR)
       num_images = MAX2(num_images, 5);
+   else if (wsi_device->x11.ensure_minImageCount)
+      num_images = MAX2(num_images, x11_get_min_image_count(wsi_device));
 
    xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
    struct wsi_x11_connection *wsi_conn =
@@ -1659,6 +1667,11 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device,
          wsi_device->x11.strict_imageCount =
             driQueryOptionb(dri_options, "vk_x11_strict_image_count");
       }
+      if (driCheckOption(dri_options, "vk_x11_ensure_min_image_count", DRI_BOOL)) {
+         wsi_device->x11.ensure_minImageCount =
+            driQueryOptionb(dri_options, "vk_x11_ensure_min_image_count");
+      }
+
    }
 
    wsi->base.get_support = x11_surface_get_support;