radv: introduce perf test env var and allow to enable chaining
authorDave Airlie <airlied@redhat.com>
Tue, 9 May 2017 03:17:30 +0000 (04:17 +0100)
committerDave Airlie <airlied@redhat.com>
Fri, 9 Jun 2017 01:15:25 +0000 (02:15 +0100)
We have some features that seem to slow things down or cause other
possible undesireable side effects, but it would be nice to test
games etc with them easily.

I forsee multisample DCC and maybe some shader opt changes using this.

For now use it for batch chaining.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h

index 4d1398e191d43283497f1fb380c8a14c9a2172e2..c986c7bc9d7b8883e126bce6e2a1b0d84c04913a 100644 (file)
@@ -37,4 +37,7 @@ enum {
        RADV_DEBUG_NO_IBS            = 0x200,
 };
 
+enum {
+       RADV_PERFTEST_BATCHCHAIN     =   0x1,
+};
 #endif
index 5fdb894146cb1d3347248765ea20c7ccaba391dd..1ea69608a14b259796c5b74ee75dcc8b6701b366 100644 (file)
@@ -270,7 +270,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        assert(strlen(path) < ARRAY_SIZE(device->path));
        strncpy(device->path, path, ARRAY_SIZE(device->path));
 
-       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
+       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
+                                              instance->perftest_flags);
        if (!device->ws) {
                result = VK_ERROR_INCOMPATIBLE_DRIVER;
                goto fail;
@@ -367,6 +368,11 @@ static const struct debug_control radv_debug_options[] = {
        {NULL, 0}
 };
 
+static const struct debug_control radv_perftest_options[] = {
+       {"batchchain", RADV_PERFTEST_BATCHCHAIN},
+       {NULL, 0}
+};
+
 VkResult radv_CreateInstance(
        const VkInstanceCreateInfo*                 pCreateInfo,
        const VkAllocationCallbacks*                pAllocator,
@@ -424,6 +430,9 @@ VkResult radv_CreateInstance(
        instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
                                                   radv_debug_options);
 
+       instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
+                                                  radv_perftest_options);
+
        *pInstance = radv_instance_to_handle(instance);
 
        return VK_SUCCESS;
index c21b17e8be437a78e30748a596045e3b2817741e..87cb0a67fe70c69e946beb264382ed3da01cbb43 100644 (file)
@@ -288,6 +288,7 @@ struct radv_instance {
        struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];
 
        uint64_t debug_flags;
+       uint64_t perftest_flags;
 };
 
 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
index 5ea12188e879f7a0e7e873587ac01f77a6ead63f..7b749700d1cc3c0c3f20a7b5bfb056286d217b8a 100644 (file)
@@ -931,7 +931,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
        if (!cs->ws->use_ib_bos) {
                ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
                                                           cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
-       } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
+       } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
                ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
                                                            cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
        } else {
index bce8dd2cceb8a982bb95592932f0f28e39212811..c7688cf4c9bda5aeb8d5942ac06bfa6b02017859 100644 (file)
@@ -82,7 +82,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
 }
 
 struct radeon_winsys *
-radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
+radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
 {
        uint32_t drm_major, drm_minor, r;
        amdgpu_device_handle dev;
@@ -106,6 +106,7 @@ radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
        if (debug_flags & RADV_DEBUG_NO_IBS)
                ws->use_ib_bos = false;
 
+       ws->batchchain = !!(perftest_flags & RADV_PERFTEST_BATCHCHAIN);
        LIST_INITHEAD(&ws->global_bo_list);
        pthread_mutex_init(&ws->global_bo_list_lock, NULL);
        ws->base.query_info = radv_amdgpu_winsys_query_info;
index 59e2730b1242c9a7c049f2f98906ca386fd0480a..426cf692ec064a8e11b5c4bfe0034a7284868059 100644 (file)
@@ -43,6 +43,7 @@ struct radv_amdgpu_winsys {
        ADDR_HANDLE addrlib;
 
        bool debug_all_bos;
+       bool batchchain;
        pthread_mutex_t global_bo_list_lock;
        struct list_head global_bo_list;
        unsigned num_buffers;
index d5d0ff52c215912a64baacc09e5a28710e5ab5ea..854e216551f487aba1b3128a2f86f5122b52ef6d 100644 (file)
@@ -29,6 +29,7 @@
 #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
 #define RADV_AMDGPU_WINSYS_PUBLIC_H
 
-struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint32_t debug_flags);
+struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
+                                               uint64_t perftest_flags);
 
 #endif /* RADV_AMDGPU_WINSYS_PUBLIC_H */