radv: change base aligmment for allocated memory.
[mesa.git] / src / amd / vulkan / radv_device.c
index 0026de5a0e09a8bef090b1afb09b2aa8c3f6e99b..9bc44b84a2ab82f88d8602271da98df2717f9a0f 100644 (file)
@@ -124,6 +124,10 @@ static const VkExtensionProperties common_device_extensions[] = {
                .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
                .specVersion = 1,
        },
+       {
+               .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
+               .specVersion = 1,
+       },
 };
 
 static VkResult
@@ -290,7 +294,7 @@ static const VkAllocationCallbacks default_alloc = {
 };
 
 static const struct debug_control radv_debug_options[] = {
-       {"fastclears", RADV_DEBUG_FAST_CLEARS},
+       {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
        {"nodcc", RADV_DEBUG_NO_DCC},
        {"shaders", RADV_DEBUG_DUMP_SHADERS},
        {"nocache", RADV_DEBUG_NO_CACHE},
@@ -619,12 +623,11 @@ void radv_GetPhysicalDeviceProperties2KHR(
        return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
 }
 
-void radv_GetPhysicalDeviceQueueFamilyProperties(
-       VkPhysicalDevice                            physicalDevice,
+static void radv_get_physical_device_queue_family_properties(
+       struct radv_physical_device*                pdevice,
        uint32_t*                                   pCount,
-       VkQueueFamilyProperties*                    pQueueFamilyProperties)
+       VkQueueFamilyProperties**                    pQueueFamilyProperties)
 {
-       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
        int num_queue_families = 1;
        int idx;
        if (pdevice->rad_info.compute_rings > 0 &&
@@ -642,7 +645,7 @@ void radv_GetPhysicalDeviceQueueFamilyProperties(
 
        idx = 0;
        if (*pCount >= 1) {
-               pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
+               *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
                        .queueFlags = VK_QUEUE_GRAPHICS_BIT |
                        VK_QUEUE_COMPUTE_BIT |
                        VK_QUEUE_TRANSFER_BIT,
@@ -657,7 +660,7 @@ void radv_GetPhysicalDeviceQueueFamilyProperties(
            pdevice->rad_info.chip_class >= CIK &&
            !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
                if (*pCount > idx) {
-                       pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
+                       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
                                .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
                                .queueCount = pdevice->rad_info.compute_rings,
                                .timestampValidBits = 64,
@@ -669,14 +672,42 @@ void radv_GetPhysicalDeviceQueueFamilyProperties(
        *pCount = idx;
 }
 
+void radv_GetPhysicalDeviceQueueFamilyProperties(
+       VkPhysicalDevice                            physicalDevice,
+       uint32_t*                                   pCount,
+       VkQueueFamilyProperties*                    pQueueFamilyProperties)
+{
+       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+       if (!pQueueFamilyProperties) {
+               return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+               return;
+       }
+       VkQueueFamilyProperties *properties[] = {
+               pQueueFamilyProperties + 0,
+               pQueueFamilyProperties + 1,
+               pQueueFamilyProperties + 2,
+       };
+       radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+       assert(*pCount <= 3);
+}
+
 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
        VkPhysicalDevice                            physicalDevice,
        uint32_t*                                   pCount,
        VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
 {
-       return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
-                                                          pCount,
-                                                          &pQueueFamilyProperties->queueFamilyProperties);
+       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+       if (!pQueueFamilyProperties) {
+               return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+               return;
+       }
+       VkQueueFamilyProperties *properties[] = {
+               &pQueueFamilyProperties[0].queueFamilyProperties,
+               &pQueueFamilyProperties[1].queueFamilyProperties,
+               &pQueueFamilyProperties[2].queueFamilyProperties,
+       };
+       radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+       assert(*pCount <= 3);
 }
 
 void radv_GetPhysicalDeviceMemoryProperties(
@@ -918,6 +949,10 @@ VkResult radv_CreateDevice(
                        goto fail;
        }
 
+       /* temporarily disabled on CIK */
+       if (device->physical_device->rad_info.chip_class > CIK)
+               cik_create_gfx_config(device);
+
        *pDevice = radv_device_to_handle(device);
        return VK_SUCCESS;
 
@@ -925,6 +960,9 @@ fail:
        if (device->trace_bo)
                device->ws->buffer_destroy(device->trace_bo);
 
+       if (device->gfx_init)
+               device->ws->buffer_destroy(device->gfx_init);
+
        for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
                for (unsigned q = 0; q < device->queue_count[i]; q++)
                        radv_queue_finish(&device->queues[i][q]);
@@ -945,6 +983,9 @@ void radv_DestroyDevice(
        if (device->trace_bo)
                device->ws->buffer_destroy(device->trace_bo);
 
+       if (device->gfx_init)
+               device->ws->buffer_destroy(device->gfx_init);
+
        for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
                for (unsigned q = 0; q < device->queue_count[i]; q++)
                        radv_queue_finish(&device->queues[i][q]);
@@ -1396,6 +1437,7 @@ VkResult radv_QueueSubmit(
        uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
        struct radeon_winsys_cs *preamble_cs = NULL;
        VkResult result;
+       bool fence_emitted = false;
 
        /* Do this first so failing to allocate scratch buffers can't result in
         * partially executed submissions. */
@@ -1420,28 +1462,57 @@ VkResult radv_QueueSubmit(
                struct radeon_winsys_cs **cs_array;
                bool can_patch = true;
                uint32_t advance;
+               int draw_cmd_buffers_count = 0;
 
-               if (!pSubmits[i].commandBufferCount)
+               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
+                       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
+                                        pSubmits[i].pCommandBuffers[j]);
+                       assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+                       if (cmd_buffer->no_draws == true)
+                               continue;
+                       draw_cmd_buffers_count++;
+               }
+
+               if (!draw_cmd_buffers_count) {
+                       if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
+                               ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
+                                                                  &queue->device->empty_cs[queue->queue_family_index],
+                                                                  1, NULL,
+                                                                  (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
+                                                                  pSubmits[i].waitSemaphoreCount,
+                                                                  (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
+                                                                  pSubmits[i].signalSemaphoreCount,
+                                                                  false, base_fence);
+                               if (ret) {
+                                       radv_loge("failed to submit CS %d\n", i);
+                                       abort();
+                               }
+                               fence_emitted = true;
+                       }
                        continue;
+               }
 
-               cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
-                                               pSubmits[i].commandBufferCount);
+               cs_array = malloc(sizeof(struct radeon_winsys_cs *) * draw_cmd_buffers_count);
 
+               int draw_cmd_buffer_idx = 0;
                for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
                        RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
                                         pSubmits[i].pCommandBuffers[j]);
                        assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+                       if (cmd_buffer->no_draws == true)
+                               continue;
 
-                       cs_array[j] = cmd_buffer->cs;
+                       cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs;
+                       draw_cmd_buffer_idx++;
                        if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
                                can_patch = false;
                }
 
-               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
+               for (uint32_t j = 0; j < draw_cmd_buffers_count; j += advance) {
                        advance = MIN2(max_cs_submission,
-                                      pSubmits[i].commandBufferCount - j);
+                                      draw_cmd_buffers_count - j);
                        bool b = j == 0;
-                       bool e = j + advance == pSubmits[i].commandBufferCount;
+                       bool e = j + advance == draw_cmd_buffers_count;
 
                        if (queue->device->trace_bo)
                                *queue->device->trace_id_ptr = 0;
@@ -1458,6 +1529,7 @@ VkResult radv_QueueSubmit(
                                radv_loge("failed to submit CS %d\n", i);
                                abort();
                        }
+                       fence_emitted = true;
                        if (queue->device->trace_bo) {
                                bool success = queue->device->ws->ctx_wait_idle(
                                                        queue->hw_ctx,
@@ -1475,7 +1547,7 @@ VkResult radv_QueueSubmit(
        }
 
        if (fence) {
-               if (!submitCount)
+               if (!fence_emitted)
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
                                                           &queue->device->empty_cs[queue->queue_family_index],
                                                           1, NULL, NULL, 0, NULL, 0,
@@ -1580,7 +1652,7 @@ VkResult radv_AllocateMemory(
        if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
                flags |= RADEON_FLAG_GTT_WC;
 
-       mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
+       mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
                                               domain, flags);
 
        if (!mem->bo) {
@@ -2153,7 +2225,7 @@ radv_initialise_color_surface(struct radv_device *device,
                        cb->cb_color_info |= S_028C70_COMPRESSION(1);
 
        if (iview->image->cmask.size &&
-           (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
+           !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
                cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
 
        if (iview->image->surface.dcc_size && level_info->dcc_enabled)