radv: flush more stages when semaphore are waiting.
authorDave Airlie <airlied@redhat.com>
Tue, 2 May 2017 21:13:21 +0000 (07:13 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 2 May 2017 21:21:31 +0000 (07:21 +1000)
This still doesn't give us complete pWaitDstStageMask support,
but it should provide enough to be correct if not as efficent as
possible.

If we have wait semaphores we must flush between submits and
flush the shaders as well.

This fixes the remaining fails in:
dEQP-VK.synchronization.op.single_queue.semaphore.*ssbo*

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h

index 1301801ef471a42e42aad05158e35b82e8b6cd30..408c0347856d8e479b9916559be933cc4087f048 100644 (file)
@@ -1046,6 +1046,22 @@ VkResult radv_CreateDevice(
                        break;
                }
                device->ws->cs_finalize(device->flush_cs[family]);
+
+               device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+               switch (family) {
+               case RADV_QUEUE_GENERAL:
+               case RADV_QUEUE_COMPUTE:
+                       si_cs_emit_cache_flush(device->flush_shader_cs[family],
+                                              device->physical_device->rad_info.chip_class,
+                                              family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+                                              family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+                                              RADV_CMD_FLAG_INV_ICACHE |
+                                              RADV_CMD_FLAG_INV_SMEM_L1 |
+                                              RADV_CMD_FLAG_INV_VMEM_L1 |
+                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
+                       break;
+               }
+               device->ws->cs_finalize(device->flush_shader_cs[family]);
        }
 
        if (getenv("RADV_TRACE_FILE")) {
@@ -1121,6 +1137,8 @@ void radv_DestroyDevice(
                        device->ws->cs_destroy(device->empty_cs[i]);
                if (device->flush_cs[i])
                        device->ws->cs_destroy(device->flush_cs[i]);
+               if (device->flush_shader_cs[i])
+                       device->ws->cs_destroy(device->flush_shader_cs[i]);
        }
        radv_device_finish_meta(device);
 
@@ -1822,7 +1840,7 @@ VkResult radv_QueueSubmit(
 
        for (uint32_t i = 0; i < submitCount; i++) {
                struct radeon_winsys_cs **cs_array;
-               bool do_flush = !i;
+               bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
                bool can_patch = !do_flush;
                uint32_t advance;
 
@@ -1849,7 +1867,9 @@ VkResult radv_QueueSubmit(
                                                (pSubmits[i].commandBufferCount + do_flush));
 
                if(do_flush)
-                       cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+                       cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+                               queue->device->flush_shader_cs[queue->queue_family_index] :
+                               queue->device->flush_cs[queue->queue_family_index];
 
                for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
                        RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
index 5028bf507b3ee45ea08e58f93e13d900b3051ceb..1fe54bb8a07052afb49f9ffa5f68ac17854021bf 100644 (file)
@@ -495,7 +495,7 @@ struct radv_device {
        int queue_count[RADV_MAX_QUEUE_FAMILIES];
        struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
        struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
-
+       struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
        uint64_t debug_flags;
 
        bool llvm_supports_spill;