radv: add initial SQ Thread Trace support for GFX9
[mesa.git] / src / amd / vulkan / radv_device.c
index de98dd867194369b5f8b516d3f4f5b407fa29b41..04fdb462a92c6fb270b734b31fed09e4b4ce49fe 100644 (file)
@@ -53,6 +53,7 @@
 #include <amdgpu.h>
 #include <amdgpu_drm.h>
 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
+#include "winsys/null/radv_null_winsys_public.h"
 #include "ac_llvm_util.h"
 #include "vk_format.h"
 #include "sid.h"
@@ -287,97 +288,69 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
        }
 }
 
-static void
-radv_handle_env_var_force_family(struct radv_physical_device *device)
-{
-       const char *family = getenv("RADV_FORCE_FAMILY");
-       unsigned i;
-
-       if (!family)
-               return;
-
-       for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
-               if (!strcmp(family, ac_get_llvm_processor_name(i))) {
-                       /* Override family and chip_class. */
-                       device->rad_info.family = i;
-                       device->rad_info.name = "OVERRIDDEN";
-
-                       if (i >= CHIP_NAVI10)
-                               device->rad_info.chip_class = GFX10;
-                       else if (i >= CHIP_VEGA10)
-                               device->rad_info.chip_class = GFX9;
-                       else if (i >= CHIP_TONGA)
-                               device->rad_info.chip_class = GFX8;
-                       else if (i >= CHIP_BONAIRE)
-                               device->rad_info.chip_class = GFX7;
-                       else
-                               device->rad_info.chip_class = GFX6;
-
-                       /* Don't submit any IBs. */
-                       device->instance->debug_flags |= RADV_DEBUG_NOOP;
-                       return;
-               }
-       }
-
-       fprintf(stderr, "radv: Unknown family: %s\n", family);
-       exit(1);
-}
-
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
                          struct radv_instance *instance,
                          drmDevicePtr drm_device)
 {
-       const char *path = drm_device->nodes[DRM_NODE_RENDER];
        VkResult result;
-       drmVersionPtr version;
-       int fd;
+       int fd = -1;
        int master_fd = -1;
 
-       fd = open(path, O_RDWR | O_CLOEXEC);
-       if (fd < 0) {
-               if (instance->debug_flags & RADV_DEBUG_STARTUP)
-                       radv_logi("Could not open device '%s'", path);
+       if (drm_device) {
+               const char *path = drm_device->nodes[DRM_NODE_RENDER];
+               drmVersionPtr version;
 
-               return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
-       }
+               fd = open(path, O_RDWR | O_CLOEXEC);
+               if (fd < 0) {
+                       if (instance->debug_flags & RADV_DEBUG_STARTUP)
+                               radv_logi("Could not open device '%s'", path);
 
-       version = drmGetVersion(fd);
-       if (!version) {
-               close(fd);
+                       return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
+               }
 
-               if (instance->debug_flags & RADV_DEBUG_STARTUP)
-                       radv_logi("Could not get the kernel driver version for device '%s'", path);
+               version = drmGetVersion(fd);
+               if (!version) {
+                       close(fd);
 
-               return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
-                                "failed to get version %s: %m", path);
-       }
+                       if (instance->debug_flags & RADV_DEBUG_STARTUP)
+                               radv_logi("Could not get the kernel driver version for device '%s'", path);
+
+                       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
+                                        "failed to get version %s: %m", path);
+               }
 
-       if (strcmp(version->name, "amdgpu")) {
+               if (strcmp(version->name, "amdgpu")) {
+                       drmFreeVersion(version);
+                       close(fd);
+
+                       if (instance->debug_flags & RADV_DEBUG_STARTUP)
+                               radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
+
+                       return VK_ERROR_INCOMPATIBLE_DRIVER;
+               }
                drmFreeVersion(version);
-               close(fd);
 
                if (instance->debug_flags & RADV_DEBUG_STARTUP)
-                       radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
-
-               return VK_ERROR_INCOMPATIBLE_DRIVER;
+                               radv_logi("Found compatible device '%s'.", path);
        }
-       drmFreeVersion(version);
-
-       if (instance->debug_flags & RADV_DEBUG_STARTUP)
-                       radv_logi("Found compatible device '%s'.", path);
 
        device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
        device->instance = instance;
 
-       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
-                                              instance->perftest_flags);
+       if (drm_device) {
+               device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
+                                                      instance->perftest_flags);
+       } else {
+               device->ws = radv_null_winsys_create();
+       }
+
        if (!device->ws) {
                result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
                goto fail;
        }
 
-       if (instance->enabled_extensions.KHR_display) {
+       if (drm_device && instance->enabled_extensions.KHR_display) {
                master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
                if (master_fd >= 0) {
                        uint32_t accel_working = 0;
@@ -398,8 +371,6 @@ radv_physical_device_init(struct radv_physical_device *device,
        device->local_fd = fd;
        device->ws->query_info(device->ws, &device->rad_info);
 
-       radv_handle_env_var_force_family(device);
-
        device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
 
        snprintf(device->name, sizeof(device->name),
@@ -414,9 +385,7 @@ radv_physical_device_init(struct radv_physical_device *device,
        }
 
        /* These flags affect shader compilation. */
-       uint64_t shader_env_flags =
-               (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
-               (device->use_aco ? 0x2 : 0);
+       uint64_t shader_env_flags = (device->use_aco ? 0x2 : 0);
 
        /* The gpu id is already embedded in the uuid so we just pass "radv"
         * when creating the cache.
@@ -470,7 +439,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        radv_physical_device_init_mem_types(device);
        radv_fill_device_extension_table(device, &device->supported_extensions);
 
-       device->bus_info = *drm_device->businfo.pci;
+       if (drm_device)
+               device->bus_info = *drm_device->businfo.pci;
 
        if ((device->instance->debug_flags & RADV_DEBUG_INFO))
                ac_print_gpu_info(&device->rad_info);
@@ -547,7 +517,6 @@ static const struct debug_control radv_debug_options[] = {
        {"vmfaults", RADV_DEBUG_VM_FAULTS},
        {"zerovram", RADV_DEBUG_ZERO_VRAM},
        {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
-       {"nosisched", RADV_DEBUG_NO_SISCHED},
        {"preoptir", RADV_DEBUG_PREOPTIR},
        {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
        {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
@@ -563,7 +532,6 @@ static const struct debug_control radv_debug_options[] = {
        {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
        {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
        {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
-       {"noop", RADV_DEBUG_NOOP},
        {NULL, 0}
 };
 
@@ -575,8 +543,6 @@ radv_get_debug_option_name(int id)
 }
 
 static const struct debug_control radv_perftest_options[] = {
-       {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
-       {"sisched", RADV_PERFTEST_SISCHED},
        {"localbos", RADV_PERFTEST_LOCAL_BOS},
        {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
        {"bolist", RADV_PERFTEST_BO_LIST},
@@ -606,15 +572,7 @@ radv_handle_per_app_options(struct radv_instance *instance,
        if (!name)
                return;
 
-       if (!strcmp(name, "Talos - Linux - 32bit") ||
-           !strcmp(name, "Talos - Linux - 64bit")) {
-               if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
-                       /* Force enable LLVM sisched for Talos because it looks
-                        * safe and it gives few more FPS.
-                        */
-                       instance->perftest_flags |= RADV_PERFTEST_SISCHED;
-               }
-       } else if (!strcmp(name, "DOOM_VFR")) {
+       if (!strcmp(name, "DOOM_VFR")) {
                /* Work around a Doom VFR game bug */
                instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
        } else if (!strcmp(name, "MonsterHunterWorld.exe")) {
@@ -809,6 +767,19 @@ radv_enumerate_devices(struct radv_instance *instance)
 
        instance->physicalDeviceCount = 0;
 
+       if (getenv("RADV_FORCE_FAMILY")) {
+               /* When RADV_FORCE_FAMILY is set, the driver creates a nul
+                * device that allows to test the compiler without having an
+                * AMDGPU instance.
+                */
+               result = radv_physical_device_init(instance->physicalDevices +
+                                                  instance->physicalDeviceCount,
+                                                  instance, NULL);
+
+               ++instance->physicalDeviceCount;
+               return VK_SUCCESS;
+       }
+
        max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
 
        if (instance->debug_flags & RADV_DEBUG_STARTUP)
@@ -931,7 +902,7 @@ void radv_GetPhysicalDeviceFeatures(
                .shaderTessellationAndGeometryPointSize   = true,
                .shaderImageGatherExtended                = true,
                .shaderStorageImageExtendedFormats        = true,
-               .shaderStorageImageMultisample            = pdevice->rad_info.chip_class >= GFX8,
+               .shaderStorageImageMultisample            = true,
                .shaderUniformBufferArrayDynamicIndexing  = true,
                .shaderSampledImageArrayDynamicIndexing   = true,
                .shaderStorageBufferArrayDynamicIndexing  = true,
@@ -1251,7 +1222,7 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->descriptorBindingPartiallyBound = true;
                        features->descriptorBindingVariableDescriptorCount = true;
                        features->runtimeDescriptorArray = true;
-                       features->samplerFilterMinmax = pdevice->rad_info.chip_class >= GFX7;
+                       features->samplerFilterMinmax = true;
                        features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
                        features->imagelessFramebuffer = true;
                        features->uniformBufferStandardLayout = true;
@@ -1270,6 +1241,17 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->subgroupBroadcastDynamicId = true;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
+                       VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
+                               (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
+                       features->rectangularLines = false;
+                       features->bresenhamLines = true;
+                       features->smoothLines = false;
+                       features->stippledRectangularLines = false;
+                       features->stippledBresenhamLines = true;
+                       features->stippledSmoothLines = false;
+                       break;
+               }
                default:
                        break;
                }
@@ -1398,7 +1380,7 @@ void radv_GetPhysicalDeviceProperties(
                .sampledImageIntegerSampleCounts          = sample_counts,
                .sampledImageDepthSampleCounts            = sample_counts,
                .sampledImageStencilSampleCounts          = sample_counts,
-               .storageImageSampleCounts                 = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
+               .storageImageSampleCounts                 = sample_counts,
                .maxSampleMaskWords                       = 1,
                .timestampComputeAndGraphics              = true,
                .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
@@ -1407,9 +1389,9 @@ void radv_GetPhysicalDeviceProperties(
                .maxCombinedClipAndCullDistances          = 8,
                .discreteQueuePriorities                  = 2,
                .pointSizeRange                           = { 0.0, 8192.0 },
-               .lineWidthRange                           = { 0.0, 7.9921875 },
+               .lineWidthRange                           = { 0.0, 8192.0 },
                .pointSizeGranularity                     = (1.0 / 8.0),
-               .lineWidthGranularity                     = (1.0 / 128.0),
+               .lineWidthGranularity                     = (1.0 / 8.0),
                .strictLines                              = false, /* FINISHME */
                .standardSampleLocations                  = true,
                .optimalBufferCopyOffsetAlignment         = 128,
@@ -1674,32 +1656,31 @@ void radv_GetPhysicalDeviceProperties2(
                                pdevice->rad_info.max_sh_per_se;
                        properties->computeUnitsPerShaderArray =
                                pdevice->rad_info.num_good_cu_per_sh;
-                       properties->simdPerComputeUnit = 4;
+                       properties->simdPerComputeUnit =
+                               pdevice->rad_info.num_simd_per_compute_unit;
                        properties->wavefrontsPerSimd =
-                               pdevice->rad_info.family == CHIP_TONGA ||
-                               pdevice->rad_info.family == CHIP_ICELAND ||
-                               pdevice->rad_info.family == CHIP_POLARIS10 ||
-                               pdevice->rad_info.family == CHIP_POLARIS11 ||
-                               pdevice->rad_info.family == CHIP_POLARIS12 ||
-                               pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
+                               pdevice->rad_info.max_wave64_per_simd;
                        properties->wavefrontSize = 64;
 
                        /* SGPR. */
                        properties->sgprsPerSimd =
                                pdevice->rad_info.num_physical_sgprs_per_simd;
                        properties->minSgprAllocation =
-                               pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
+                               pdevice->rad_info.min_sgpr_alloc;
                        properties->maxSgprAllocation =
-                               pdevice->rad_info.family == CHIP_TONGA ||
-                               pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
+                               pdevice->rad_info.max_sgpr_alloc;
                        properties->sgprAllocationGranularity =
-                               pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
+                               pdevice->rad_info.sgpr_alloc_granularity;
 
                        /* VGPR. */
-                       properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
-                       properties->minVgprAllocation = 4;
-                       properties->maxVgprAllocation = 256;
-                       properties->vgprAllocationGranularity = 4;
+                       properties->vgprsPerSimd =
+                               pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
+                       properties->minVgprAllocation =
+                               pdevice->rad_info.min_vgpr_alloc;
+                       properties->maxVgprAllocation =
+                               pdevice->rad_info.max_vgpr_alloc;
+                       properties->vgprAllocationGranularity =
+                               pdevice->rad_info.vgpr_alloc_granularity;
                        break;
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
@@ -1889,6 +1870,12 @@ void radv_GetPhysicalDeviceProperties2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
                        radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
                        break;
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
+                       VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
+                               (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
+                       props->lineSubPixelPrecisionBits = 4;
+                       break;
+               }
                default:
                        break;
                }
@@ -3895,6 +3882,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (gds_oa_bo)
                        radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
 
+               if (queue->device->trace_bo)
+                       radv_cs_add_buffer(queue->device->ws, cs, queue->device->trace_bo);
+
                if (i == 0) {
                        si_cs_emit_cache_flush(cs,
                                               queue->device->physical_device->rad_info.chip_class,
@@ -4662,6 +4652,25 @@ static VkResult radv_queue_submit(struct radv_queue *queue,
        return radv_process_submissions(&processing_list);
 }
 
+bool
+radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
+{
+       struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+       struct radv_winsys_sem_info sem_info;
+       VkResult result;
+       int ret;
+
+       result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
+                                    0, NULL, NULL);
+       if (result != VK_SUCCESS)
+               return false;
+
+       ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL,
+                                          NULL, &sem_info, NULL, false, NULL);
+       radv_free_sem_info(&sem_info);
+       return !ret;
+}
+
 /* Signals fence as soon as all the work currently put on queue is done. */
 static VkResult radv_signal_fence(struct radv_queue *queue,
                               VkFence fence)