This is a complete rewrite of my previous rfc patches.
This adds the ability to present to a different GPU that rendering
using a driver side operation that can copy from the tiled to
linear shared image.
This does prime support completely in the swapchain present code,
and each queue has a precreated command buffer for each image
and for the each queue family. This means presenting should work
on graphics and compute queues and transfer in the future.
v1.1: initialise needs_linear_copy in swapchain.
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Mike Lothian <mike@fireburn.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
uint32_t region_count,
const VkImageResolve *regions);
+void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ struct radv_image *linear_image);
#ifdef __cplusplus
}
#endif
meta_copy_image(cmd_buffer, src_image, dest_image,
regionCount, pRegions);
}
+
+void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ struct radv_image *linear_image)
+{
+ struct VkImageCopy image_copy = { 0 };
+
+ image_copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ image_copy.srcSubresource.layerCount = 1;
+
+ image_copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ image_copy.dstSubresource.layerCount = 1;
+
+ image_copy.extent.width = image->extent.width;
+ image_copy.extent.height = image->extent.height;
+ image_copy.extent.depth = 1;
+
+ meta_copy_image(cmd_buffer, image, linear_image,
+ 1, &image_copy);
+}
*/
#include "radv_private.h"
+#include "radv_meta.h"
#include "wsi_common.h"
static const struct wsi_callbacks wsi_cbs = {
return iface->get_support(surface, &device->wsi_device,
&device->instance->alloc,
- queueFamilyIndex, device->local_fd, pSupported);
+ queueFamilyIndex, device->local_fd, true, pSupported);
}
VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
radv_wsi_image_create(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
+ bool needs_linear_copy,
+ bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
- .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .tiling = linear ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
},
return result;
image = radv_image_from_handle(image_h);
-
VkDeviceMemory memory_h;
struct radv_device_memory *memory;
+
result = radv_AllocateMemory(device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = image->size,
- .memoryTypeIndex = 0,
+ .memoryTypeIndex = linear ? 1 : 0,
},
NULL /* XXX: pAllocator */,
&memory_h);
radv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
- bret = device->ws->buffer_get_fd(device->ws,
- memory->bo, &fd);
- if (bret == false)
- goto fail_alloc_memory;
+ /*
+ * return the fd for the image in the no copy mode,
+ * or the fd for the linear image if a copy is required.
+ */
+ if (!needs_linear_copy || (needs_linear_copy && linear)) {
+ bret = device->ws->buffer_get_fd(device->ws,
+ memory->bo, &fd);
+ if (bret == false)
+ goto fail_alloc_memory;
+ *fd_p = fd;
+ }
{
struct radeon_bo_metadata metadata;
radv_init_metadata(device, image, &metadata);
device->ws->buffer_set_metadata(memory->bo, &metadata);
}
+
surface = &image->surface;
*image_p = image_h;
*memory_p = memory_h;
- *fd_p = fd;
*size = image->size;
*offset = image->offset;
*row_pitch = surface->level[0].pitch_bytes;
.free_wsi_image = radv_wsi_image_free,
};
+#define NUM_PRIME_POOLS RADV_QUEUE_TRANSFER
+static void
+radv_wsi_free_prime_command_buffers(struct radv_device *device,
+ struct wsi_swapchain *swapchain)
+{
+ const int num_pools = NUM_PRIME_POOLS;
+ const int num_images = swapchain->image_count;
+ int i;
+ for (i = 0; i < num_pools; i++) {
+ radv_FreeCommandBuffers(radv_device_to_handle(device),
+ swapchain->cmd_pools[i],
+ swapchain->image_count,
+ &swapchain->cmd_buffers[i * num_images]);
+
+ radv_DestroyCommandPool(radv_device_to_handle(device),
+ swapchain->cmd_pools[i],
+ &swapchain->alloc);
+ }
+}
+
+static VkResult
+radv_wsi_create_prime_command_buffers(struct radv_device *device,
+ const VkAllocationCallbacks *alloc,
+ struct wsi_swapchain *swapchain)
+{
+ const int num_pools = NUM_PRIME_POOLS;
+ const int num_images = swapchain->image_count;
+ int num_cmd_buffers = num_images * num_pools; //TODO bump to MAX_QUEUE_FAMILIES
+ VkResult result;
+ int i, j;
+
+ swapchain->cmd_buffers = vk_alloc(alloc, (sizeof(VkCommandBuffer) * num_cmd_buffers), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!swapchain->cmd_buffers)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ memset(swapchain->cmd_buffers, 0, sizeof(VkCommandBuffer) * num_cmd_buffers);
+ memset(swapchain->cmd_pools, 0, sizeof(VkCommandPool) * num_pools);
+ for (i = 0; i < num_pools; i++) {
+ VkCommandPoolCreateInfo pool_create_info;
+
+ pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ pool_create_info.pNext = NULL;
+ pool_create_info.flags = 0;
+ pool_create_info.queueFamilyIndex = i;
+
+ result = radv_CreateCommandPool(radv_device_to_handle(device),
+ &pool_create_info, alloc,
+ &swapchain->cmd_pools[i]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkCommandBufferAllocateInfo cmd_buffer_info;
+ cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ cmd_buffer_info.pNext = NULL;
+ cmd_buffer_info.commandPool = swapchain->cmd_pools[i];
+ cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ cmd_buffer_info.commandBufferCount = num_images;
+
+ result = radv_AllocateCommandBuffers(radv_device_to_handle(device),
+ &cmd_buffer_info,
+ &swapchain->cmd_buffers[i * num_images]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ for (j = 0; j < num_images; j++) {
+ VkImage image, linear_image;
+ int idx = (i * num_images) + j;
+
+ swapchain->get_image_and_linear(swapchain, j, &image, &linear_image);
+ VkCommandBufferBeginInfo begin_info = {0};
+
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+
+ radv_BeginCommandBuffer(swapchain->cmd_buffers[idx], &begin_info);
+
+ radv_blit_to_prime_linear(radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx]),
+ radv_image_from_handle(image),
+ radv_image_from_handle(linear_image));
+
+ radv_EndCommandBuffer(swapchain->cmd_buffers[idx]);
+ }
+ }
+ return VK_SUCCESS;
+fail:
+ radv_wsi_free_prime_command_buffers(device, swapchain);
+ return result;
+}
+
VkResult radv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
alloc = &device->alloc;
VkResult result = iface->create_swapchain(surface, _device,
&device->physical_device->wsi_device,
+ device->physical_device->local_fd,
pCreateInfo,
alloc, &radv_wsi_image_fns,
&swapchain);
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
swapchain->fences[i] = VK_NULL_HANDLE;
+ if (swapchain->needs_linear_copy) {
+ result = radv_wsi_create_prime_command_buffers(device, alloc,
+ swapchain);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
*pSwapchain = wsi_swapchain_to_handle(swapchain);
return VK_SUCCESS;
radv_DestroyFence(_device, swapchain->fences[i], pAllocator);
}
+ if (swapchain->needs_linear_copy)
+ radv_wsi_free_prime_command_buffers(device, swapchain);
+
swapchain->destroy(swapchain, alloc);
}
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
-
+ struct radeon_winsys_cs *cs;
assert(radv_device_from_handle(swapchain->device) == queue->device);
if (swapchain->fences[0] == VK_NULL_HANDLE) {
result = radv_CreateFence(radv_device_to_handle(queue->device),
1, &swapchain->fences[0]);
}
+ if (swapchain->needs_linear_copy) {
+ int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i];
+ cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs;
+ } else
+ cs = queue->device->empty_cs[queue->queue_family_index];
RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
struct radeon_winsys_fence *base_fence = fence->fence;
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
+ &cs,
1, NULL, NULL,
(struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
- device->local_fd,
+ device->local_fd, true,
connection, visual_id);
}
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
- device->local_fd,
+ device->local_fd, true,
XGetXCBConnection(dpy), visualID);
}
return iface->get_support(surface, &device->wsi_device,
&device->instance->alloc,
- queueFamilyIndex, device->local_fd, pSupported);
+ queueFamilyIndex, device->local_fd, false, pSupported);
}
VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
x11_anv_wsi_image_create(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
+ bool different_gpu,
+ bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size,
alloc = &device->alloc;
VkResult result = iface->create_swapchain(surface, _device,
&device->instance->physicalDevice.wsi_device,
+ device->instance->physicalDevice.local_fd,
pCreateInfo,
alloc, &anv_wsi_image_fns,
&swapchain);
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
- device->local_fd,
+ device->local_fd, false,
connection, visual_id);
}
&device->wsi_device,
&device->instance->alloc,
queueFamilyIndex,
- device->local_fd,
+ device->local_fd, false,
XGetXCBConnection(dpy), visualID);
}
VkResult (*create_wsi_image)(VkDevice device_h,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
+ bool needs_linear_copy,
+ bool linear,
VkImage *image_p,
VkDeviceMemory *memory_p,
uint32_t *size_p,
VkAllocationCallbacks alloc;
const struct wsi_image_fns *image_fns;
VkFence fences[3];
+ VkCommandBuffer *cmd_buffers;
+ VkCommandPool cmd_pools[3];
VkPresentModeKHR present_mode;
uint32_t image_count;
+ bool needs_linear_copy;
VkResult (*destroy)(struct wsi_swapchain *swapchain,
const VkAllocationCallbacks *pAllocator);
uint32_t *image_index);
VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
uint32_t image_index);
+ void (*get_image_and_linear)(struct wsi_swapchain *swapchain, int imageIndex, VkImage *image, VkImage *linear_image);
};
struct wsi_interface {
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
+ bool can_handle_different_gpu,
VkBool32* pSupported);
VkResult (*get_capabilities)(VkIcdSurfaceBase *surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities);
VkResult (*create_swapchain)(VkIcdSurfaceBase *surface,
VkDevice device,
struct wsi_device *wsi_device,
+ int local_fd,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
+ bool can_handle_different_gpu,
VkBool32* pSupported)
{
*pSupported = true;
result = chain->base.image_fns->create_wsi_image(vk_device,
pCreateInfo,
pAllocator,
+ false,
+ false,
&image->image,
&image->memory,
&size,
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
VkDevice device,
struct wsi_device *wsi_device,
+ int local_fd,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,
chain->base.image_fns = image_fns;
chain->base.present_mode = pCreateInfo->presentMode;
chain->base.image_count = num_images;
+ chain->base.needs_linear_copy = false;
chain->surface = surface->surface;
chain->extent = pCreateInfo->imageExtent;
chain->vk_format = pCreateInfo->imageFormat;
VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int fd,
+ bool can_handle_different_gpu,
xcb_connection_t* connection,
xcb_visualid_t visual_id)
{
return false;
}
- if (!wsi_x11_check_dri3_compatible(connection, fd))
- return false;
+ if (!can_handle_different_gpu)
+ if (!wsi_x11_check_dri3_compatible(connection, fd))
+ return false;
unsigned visual_depth;
if (!connection_get_visualtype(connection, visual_id, &visual_depth))
const VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
+ bool can_handle_different_gpu,
VkBool32* pSupported)
{
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
return VK_SUCCESS;
}
- if (!wsi_x11_check_dri3_compatible(conn, local_fd))
- return false;
+ if (!can_handle_different_gpu)
+ if (!wsi_x11_check_dri3_compatible(conn, local_fd))
+ return false;
unsigned visual_depth;
if (!get_visualtype_for_window(conn, window, &visual_depth)) {
struct x11_image {
VkImage image;
+ VkImage linear_image; // for prime
VkDeviceMemory memory;
+ VkDeviceMemory linear_memory; // for prime
xcb_pixmap_t pixmap;
bool busy;
struct xshmfence * shm_fence;
return result;
}
+static void
+x11_get_image_and_linear(struct wsi_swapchain *drv_chain,
+ int imageIndex, VkImage *image, VkImage *linear_image)
+{
+ struct x11_swapchain *chain = (struct x11_swapchain *)drv_chain;
+ *image = chain->images[imageIndex].image;
+ *linear_image = chain->images[imageIndex].linear_image;
+}
+
static VkResult
x11_handle_dri3_present_event(struct x11_swapchain *chain,
xcb_present_generic_event_t *event)
result = chain->base.image_fns->create_wsi_image(device_h,
pCreateInfo,
pAllocator,
+ chain->base.needs_linear_copy,
+ false,
&image->image,
&image->memory,
&size,
if (result != VK_SUCCESS)
return result;
+ if (chain->base.needs_linear_copy) {
+ result = chain->base.image_fns->create_wsi_image(device_h,
+ pCreateInfo,
+ pAllocator,
+ chain->base.needs_linear_copy,
+ true,
+ &image->linear_image,
+ &image->linear_memory,
+ &size,
+ &offset,
+ &row_pitch,
+ &fd);
+ if (result != VK_SUCCESS) {
+ chain->base.image_fns->free_wsi_image(device_h, pAllocator,
+ image->image, image->memory);
+ return result;
+ }
+ }
+
image->pixmap = xcb_generate_id(chain->conn);
cookie =
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
+ if (chain->base.needs_linear_copy) {
+ chain->base.image_fns->free_wsi_image(device_h, pAllocator,
+ image->linear_image, image->linear_memory);
+ }
chain->base.image_fns->free_wsi_image(device_h, pAllocator,
- image->image, image->memory);
+ image->image, image->memory);
return result;
}
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
+ if (chain->base.needs_linear_copy) {
+ chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
+ image->linear_image, image->linear_memory);
+ }
chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
image->image, image->memory);
}
x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
VkDevice device,
struct wsi_device *wsi_device,
+ int local_fd,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
const struct wsi_image_fns *image_fns,
chain->base.device = device;
chain->base.destroy = x11_swapchain_destroy;
chain->base.get_images = x11_get_images;
+ chain->base.get_image_and_linear = x11_get_image_and_linear;
chain->base.acquire_next_image = x11_acquire_next_image;
chain->base.queue_present = x11_queue_present;
chain->base.image_fns = image_fns;
free(geometry);
+ chain->base.needs_linear_copy = false;
+ if (!wsi_x11_check_dri3_compatible(conn, local_fd))
+ chain->base.needs_linear_copy = true;
+
chain->event_id = xcb_generate_id(chain->conn);
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
VkAllocationCallbacks *alloc,
uint32_t queueFamilyIndex,
int local_fd,
+ bool can_handle_different_gpu,
xcb_connection_t* connection,
xcb_visualid_t visual_id);