turnip: enable sampleRateShading feature
[mesa.git] / src / freedreno / vulkan / tu_device.c
index 540c033ceb3054ecaa2ca6ee8d1e7d467e1cab73..82c1cb686bec5e800b041826ea43a047570eaa8a 100644 (file)
@@ -39,7 +39,6 @@
 #include "compiler/glsl_types.h"
 #include "util/debug.h"
 #include "util/disk_cache.h"
-#include "util/strtod.h"
 #include "vk_format.h"
 #include "vk_util.h"
 
@@ -257,9 +256,22 @@ tu_physical_device_init(struct tu_physical_device *device,
    sprintf(device->name, "FD%d", device->gpu_id);
 
    switch (device->gpu_id) {
+   case 618:
+      device->tile_align_w = 64;
+      device->tile_align_h = 16;
+      device->magic.RB_UNKNOWN_8E04_blit = 0x00100000;
+      device->magic.RB_CCU_CNTL_gmem     = 0x3e400004;
+      device->magic.PC_UNKNOWN_9805 = 0x0;
+      device->magic.SP_UNKNOWN_A0F8 = 0x0;
+      break;
    case 630:
-      device->tile_align_w = 32;
-      device->tile_align_h = 32;
+   case 640:
+      device->tile_align_w = 64;
+      device->tile_align_h = 16;
+      device->magic.RB_UNKNOWN_8E04_blit = 0x01000000;
+      device->magic.RB_CCU_CNTL_gmem     = 0x7c400004;
+      device->magic.PC_UNKNOWN_9805 = 0x1;
+      device->magic.SP_UNKNOWN_A0F8 = 0x1;
       break;
    default:
       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
@@ -354,6 +366,9 @@ static const struct debug_control tu_debug_options[] = {
    { "startup", TU_DEBUG_STARTUP },
    { "nir", TU_DEBUG_NIR },
    { "ir3", TU_DEBUG_IR3 },
+   { "nobin", TU_DEBUG_NOBIN },
+   { "sysmem", TU_DEBUG_SYSMEM },
+   { "forcebin", TU_DEBUG_FORCEBIN },
    { NULL, 0 }
 };
 
@@ -431,7 +446,6 @@ tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
       return vk_error(instance, result);
    }
 
-   _mesa_locale_init();
    glsl_type_singleton_init_or_ref();
 
    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
@@ -457,7 +471,6 @@ tu_DestroyInstance(VkInstance _instance,
    VG(VALGRIND_DESTROY_MEMPOOL(instance));
 
    glsl_type_singleton_decref();
-   _mesa_locale_fini();
 
    vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
 
@@ -569,7 +582,7 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
       .independentBlend = false,
       .geometryShader = false,
       .tessellationShader = false,
-      .sampleRateShading = false,
+      .sampleRateShading = true,
       .dualSrcBlend = false,
       .logicOp = false,
       .multiDrawIndirect = false,
@@ -582,11 +595,11 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
       .largePoints = false,
       .alphaToOne = false,
       .multiViewport = false,
-      .samplerAnisotropy = false,
-      .textureCompressionETC2 = false,
-      .textureCompressionASTC_LDR = false,
-      .textureCompressionBC = false,
-      .occlusionQueryPrecise = false,
+      .samplerAnisotropy = true,
+      .textureCompressionETC2 = true,
+      .textureCompressionASTC_LDR = true,
+      .textureCompressionBC = true,
+      .occlusionQueryPrecise = true,
       .pipelineStatisticsQuery = false,
       .vertexPipelineStoresAndAtomics = false,
       .fragmentStoresAndAtomics = false,
@@ -703,7 +716,8 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
                                VkPhysicalDeviceProperties *pProperties)
 {
    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
-   VkSampleCountFlags sample_counts = 0xf;
+   VkSampleCountFlags sample_counts = VK_SAMPLE_COUNT_1_BIT |
+      VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
 
    /* make sure that the entire descriptor set is addressable with a signed
     * 32-bit int. So the sum of all limits scaled by descriptor size has to
@@ -726,7 +740,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxImageArrayLayers = (1 << 11),
       .maxTexelBufferElements = 128 * 1024 * 1024,
       .maxUniformBufferRange = UINT32_MAX,
-      .maxStorageBufferRange = UINT32_MAX,
+      .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
       .maxMemoryAllocationCount = UINT32_MAX,
       .maxSamplerAllocationCount = 64 * 1024,
@@ -786,7 +800,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .viewportBoundsRange = { INT16_MIN, INT16_MAX },
       .viewportSubPixelBits = 8,
       .minMemoryMapAlignment = 4096, /* A page */
-      .minTexelBufferOffsetAlignment = 1,
+      .minTexelBufferOffsetAlignment = 64,
       .minUniformBufferOffsetAlignment = 4,
       .minStorageBufferOffsetAlignment = 4,
       .minTexelOffset = -32,
@@ -810,7 +824,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .sampledImageStencilSampleCounts = sample_counts,
       .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
       .maxSampleMaskWords = 1,
-      .timestampComputeAndGraphics = true,
+      .timestampComputeAndGraphics = false, /* FINISHME */
       .timestampPeriod = 1,
       .maxClipDistances = 8,
       .maxCullDistances = 8,
@@ -899,7 +913,7 @@ static const VkQueueFamilyProperties tu_queue_family_properties = {
    .queueFlags =
       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
    .queueCount = 1,
-   .timestampValidBits = 64,
+   .timestampValidBits = 0, /* FINISHME */
    .minImageTransferGranularity = { 1, 1, 1 },
 };
 
@@ -1074,7 +1088,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->queues[qfi]) {
          result = VK_ERROR_OUT_OF_HOST_MEMORY;
-         goto fail;
+         goto fail_queues;
       }
 
       memset(device->queues[qfi], 0,
@@ -1086,13 +1100,27 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          result = tu_queue_init(device, &device->queues[qfi][q], qfi, q,
                                 queue_create->flags);
          if (result != VK_SUCCESS)
-            goto fail;
+            goto fail_queues;
       }
    }
 
    device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id);
    if (!device->compiler)
-      goto fail;
+      goto fail_queues;
+
+#define VSC_DATA_SIZE(pitch)  ((pitch) * 32 + 0x100)  /* extra size to store VSC_SIZE */
+#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
+
+   device->vsc_data_pitch = 0x440 * 4;
+   device->vsc_data2_pitch = 0x1040 * 4;
+
+   result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data;
+
+   result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data2;
 
    VkPipelineCacheCreateInfo ci;
    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
@@ -1104,14 +1132,23 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
    result =
       tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
    if (result != VK_SUCCESS)
-      goto fail;
+      goto fail_pipeline_cache;
 
    device->mem_cache = tu_pipeline_cache_from_handle(pc);
 
    *pDevice = tu_device_to_handle(device);
    return VK_SUCCESS;
 
-fail:
+fail_pipeline_cache:
+   tu_bo_finish(device, &device->vsc_data2);
+
+fail_vsc_data2:
+   tu_bo_finish(device, &device->vsc_data);
+
+fail_vsc_data:
+   ralloc_free(device->compiler);
+
+fail_queues:
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
@@ -1119,9 +1156,6 @@ fail:
          vk_free(&device->alloc, device->queues[i]);
    }
 
-   if (device->compiler)
-      ralloc_free(device->compiler);
-
    vk_free(&device->alloc, device);
    return result;
 }
@@ -1134,6 +1168,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    if (!device)
       return;
 
+   tu_bo_finish(device, &device->vsc_data);
+   tu_bo_finish(device, &device->vsc_data2);
+
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
@@ -1563,7 +1600,7 @@ tu_GetImageMemoryRequirements(VkDevice _device,
    TU_FROM_HANDLE(tu_image, image, _image);
 
    pMemoryRequirements->memoryTypeBits = 1;
-   pMemoryRequirements->size = image->size;
+   pMemoryRequirements->size = image->layout.size;
    pMemoryRequirements->alignment = image->alignment;
 }
 
@@ -1761,6 +1798,8 @@ tu_DestroyEvent(VkDevice _device,
 
    if (!event)
       return;
+
+   tu_bo_finish(device, &event->bo);
    vk_free2(&device->alloc, pAllocator, event);
 }
 
@@ -1926,6 +1965,12 @@ tu6_tex_filter(VkFilter filter, unsigned aniso)
    }
 }
 
+static inline enum adreno_compare_func
+tu6_compare_func(VkCompareOp op)
+{
+   return (enum adreno_compare_func) op;
+}
+
 static void
 tu_init_sampler(struct tu_device *device,
                 struct tu_sampler *sampler,
@@ -1950,7 +1995,8 @@ tu_init_sampler(struct tu_device *device,
       COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
       A6XX_TEX_SAMP_1_MIN_LOD(pCreateInfo->minLod) |
       A6XX_TEX_SAMP_1_MAX_LOD(pCreateInfo->maxLod) |
-      COND(pCreateInfo->compareEnable, A6XX_TEX_SAMP_1_COMPARE_FUNC(pCreateInfo->compareOp));
+      COND(pCreateInfo->compareEnable,
+           A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
    sampler->state[2] = 0;
    sampler->state[3] = 0;
 
@@ -1960,6 +2006,7 @@ tu_init_sampler(struct tu_device *device,
     */
 
    sampler->needs_border = needs_border;
+   sampler->border = pCreateInfo->borderColor;
 }
 
 VkResult