tu: ir3: Emit push constants directly
[mesa.git] / src / freedreno / vulkan / tu_device.c
index 645fcd31648ec8d2cfb6f174f91c39a1642b17c6..394c3ce18009d18f06352dfb703ed0c9a39e4d43 100644 (file)
@@ -252,14 +252,32 @@ tu_physical_device_init(struct tu_physical_device *device,
       goto fail;
    }
 
+   if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
+      if (instance->debug_flags & TU_DEBUG_STARTUP)
+         tu_logi("Could not query the GMEM size");
+      result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
+                         "could not get GMEM size");
+      goto fail;
+   }
+
    memset(device->name, 0, sizeof(device->name));
    sprintf(device->name, "FD%d", device->gpu_id);
 
    switch (device->gpu_id) {
+   case 618:
+      device->magic.RB_UNKNOWN_8E04_blit = 0x00100000;
+      device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */
+      device->ccu_offset_bypass = 0x10000;
+      device->magic.PC_UNKNOWN_9805 = 0x0;
+      device->magic.SP_UNKNOWN_A0F8 = 0x0;
+      break;
    case 630:
    case 640:
-      device->tile_align_w = 32;
-      device->tile_align_h = 32;
+      device->magic.RB_UNKNOWN_8E04_blit = 0x01000000;
+      device->ccu_offset_gmem = 0xf8000;
+      device->ccu_offset_bypass = 0x20000;
+      device->magic.PC_UNKNOWN_9805 = 0x1;
+      device->magic.SP_UNKNOWN_A0F8 = 0x1;
       break;
    default:
       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
@@ -318,7 +336,7 @@ tu_physical_device_finish(struct tu_physical_device *device)
       close(device->master_fd);
 }
 
-static void *
+static VKAPI_ATTR void *
 default_alloc_func(void *pUserData,
                    size_t size,
                    size_t align,
@@ -327,7 +345,7 @@ default_alloc_func(void *pUserData,
    return malloc(size);
 }
 
-static void *
+static VKAPI_ATTR void *
 default_realloc_func(void *pUserData,
                      void *pOriginal,
                      size_t size,
@@ -337,7 +355,7 @@ default_realloc_func(void *pUserData,
    return realloc(pOriginal, size);
 }
 
-static void
+static VKAPI_ATTR void
 default_free_func(void *pUserData, void *pMemory)
 {
    free(pMemory);
@@ -354,6 +372,9 @@ static const struct debug_control tu_debug_options[] = {
    { "startup", TU_DEBUG_STARTUP },
    { "nir", TU_DEBUG_NIR },
    { "ir3", TU_DEBUG_IR3 },
+   { "nobin", TU_DEBUG_NOBIN },
+   { "sysmem", TU_DEBUG_SYSMEM },
+   { "forcebin", TU_DEBUG_FORCEBIN },
    { NULL, 0 }
 };
 
@@ -562,17 +583,17 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
 
    *pFeatures = (VkPhysicalDeviceFeatures) {
       .robustBufferAccess = false,
-      .fullDrawIndexUint32 = false,
+      .fullDrawIndexUint32 = true,
       .imageCubeArray = false,
-      .independentBlend = false,
-      .geometryShader = false,
+      .independentBlend = true,
+      .geometryShader = true,
       .tessellationShader = false,
-      .sampleRateShading = false,
-      .dualSrcBlend = false,
-      .logicOp = false,
+      .sampleRateShading = true,
+      .dualSrcBlend = true,
+      .logicOp = true,
       .multiDrawIndirect = false,
       .drawIndirectFirstInstance = false,
-      .depthClamp = false,
+      .depthClamp = true,
       .depthBiasClamp = false,
       .fillModeNonSolid = false,
       .depthBounds = false,
@@ -584,7 +605,7 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
       .textureCompressionETC2 = true,
       .textureCompressionASTC_LDR = true,
       .textureCompressionBC = true,
-      .occlusionQueryPrecise = false,
+      .occlusionQueryPrecise = true,
       .pipelineStatisticsQuery = false,
       .vertexPipelineStoresAndAtomics = false,
       .fragmentStoresAndAtomics = false,
@@ -689,6 +710,13 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
          features->inheritedConditionalRendering = false;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+         VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
+            (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext;
+         features->transformFeedback = true;
+         features->geometryStreams = false;
+         break;
+      }
       default:
          break;
       }
@@ -701,8 +729,8 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
                                VkPhysicalDeviceProperties *pProperties)
 {
    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
-   VkSampleCountFlags sample_counts = VK_SAMPLE_COUNT_1_BIT |
-      VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
+   VkSampleCountFlags sample_counts =
+      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
 
    /* make sure that the entire descriptor set is addressable with a signed
     * 32-bit int. So the sum of all limits scaled by descriptor size has to
@@ -749,7 +777,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
       .maxVertexInputAttributes = 32,
       .maxVertexInputBindings = 32,
-      .maxVertexInputAttributeOffset = 2047,
+      .maxVertexInputAttributeOffset = 4095,
       .maxVertexInputBindingStride = 2048,
       .maxVertexOutputComponents = 128,
       .maxTessellationGenerationLevel = 64,
@@ -760,7 +788,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxTessellationControlTotalOutputComponents = 4096,
       .maxTessellationEvaluationInputComponents = 128,
       .maxTessellationEvaluationOutputComponents = 128,
-      .maxGeometryShaderInvocations = 127,
+      .maxGeometryShaderInvocations = 32,
       .maxGeometryInputComponents = 64,
       .maxGeometryOutputComponents = 128,
       .maxGeometryOutputVertices = 256,
@@ -773,7 +801,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
       .maxComputeWorkGroupInvocations = 2048,
       .maxComputeWorkGroupSize = { 2048, 2048, 2048 },
-      .subPixelPrecisionBits = 4 /* FIXME */,
+      .subPixelPrecisionBits = 8,
       .subTexelPrecisionBits = 4 /* FIXME */,
       .mipmapPrecisionBits = 4 /* FIXME */,
       .maxDrawIndexedIndexValue = UINT32_MAX,
@@ -785,7 +813,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .viewportBoundsRange = { INT16_MIN, INT16_MAX },
       .viewportSubPixelBits = 8,
       .minMemoryMapAlignment = 4096, /* A page */
-      .minTexelBufferOffsetAlignment = 1,
+      .minTexelBufferOffsetAlignment = 64,
       .minUniformBufferOffsetAlignment = 4,
       .minStorageBufferOffsetAlignment = 4,
       .minTexelOffset = -32,
@@ -809,8 +837,8 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .sampledImageStencilSampleCounts = sample_counts,
       .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
       .maxSampleMaskWords = 1,
-      .timestampComputeAndGraphics = false, /* FINISHME */
-      .timestampPeriod = 1,
+      .timestampComputeAndGraphics = true,
+      .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
       .maxClipDistances = 8,
       .maxCullDistances = 8,
       .maxCombinedClipAndCullDistances = 8,
@@ -888,6 +916,23 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
          properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+         VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
+            (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
+
+         properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
+         properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
+         properties->maxTransformFeedbackBufferSize = UINT32_MAX;
+         properties->maxTransformFeedbackStreamDataSize = 512;
+         properties->maxTransformFeedbackBufferDataSize = 512;
+         properties->maxTransformFeedbackBufferDataStride = 512;
+         /* TODO: enable xfb query */
+         properties->transformFeedbackQueries = false;
+         properties->transformFeedbackStreamsLinesTriangles = false;
+         properties->transformFeedbackRasterizationStreamSelect = false;
+         properties->transformFeedbackDraw = true;
+         break;
+      }
       default:
          break;
       }
@@ -898,7 +943,7 @@ static const VkQueueFamilyProperties tu_queue_family_properties = {
    .queueFlags =
       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
    .queueCount = 1,
-   .timestampValidBits = 0, /* FINISHME */
+   .timestampValidBits = 48,
    .minImageTransferGranularity = { 1, 1, 1 },
 };
 
@@ -1012,6 +1057,61 @@ tu_get_device_extension_index(const char *name)
    return -1;
 }
 
+struct PACKED bcolor_entry {
+   uint32_t fp32[4];
+   uint16_t ui16[4];
+   int16_t  si16[4];
+   uint16_t fp16[4];
+   uint16_t rgb565;
+   uint16_t rgb5a1;
+   uint16_t rgba4;
+   uint8_t __pad0[2];
+   uint8_t  ui8[4];
+   int8_t   si8[4];
+   uint32_t rgb10a2;
+   uint32_t z24; /* also s8? */
+   uint16_t srgb[4];      /* appears to duplicate fp16[], but clamped, used for srgb */
+   uint8_t  __pad1[56];
+} border_color[] = {
+   [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
+   [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
+   [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
+      .fp32[3] = 0x3f800000,
+      .ui16[3] = 0xffff,
+      .si16[3] = 0x7fff,
+      .fp16[3] = 0x3c00,
+      .rgb5a1 = 0x8000,
+      .rgba4 = 0xf000,
+      .ui8[3] = 0xff,
+      .si8[3] = 0x7f,
+      .rgb10a2 = 0xc0000000,
+      .srgb[3] = 0x3c00,
+   },
+   [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
+      .fp32[3] = 1,
+      .fp16[3] = 1,
+   },
+   [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
+      .fp32[0 ... 3] = 0x3f800000,
+      .ui16[0 ... 3] = 0xffff,
+      .si16[0 ... 3] = 0x7fff,
+      .fp16[0 ... 3] = 0x3c00,
+      .rgb565 = 0xffff,
+      .rgb5a1 = 0xffff,
+      .rgba4 = 0xffff,
+      .ui8[0 ... 3] = 0xff,
+      .si8[0 ... 3] = 0x7f,
+      .rgb10a2 = 0xffffffff,
+      .z24 = 0xffffff,
+      .srgb[0 ... 3] = 0x3c00,
+   },
+   [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
+      .fp32[0 ... 3] = 1,
+      .fp16[0 ... 3] = 1,
+   },
+};
+
+
 VkResult
 tu_CreateDevice(VkPhysicalDevice physicalDevice,
                 const VkDeviceCreateInfo *pCreateInfo,
@@ -1073,7 +1173,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->queues[qfi]) {
          result = VK_ERROR_OUT_OF_HOST_MEMORY;
-         goto fail;
+         goto fail_queues;
       }
 
       memset(device->queues[qfi], 0,
@@ -1085,13 +1185,38 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          result = tu_queue_init(device, &device->queues[qfi][q], qfi, q,
                                 queue_create->flags);
          if (result != VK_SUCCESS)
-            goto fail;
+            goto fail_queues;
       }
    }
 
    device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id);
    if (!device->compiler)
-      goto fail;
+      goto fail_queues;
+
+#define VSC_DATA_SIZE(pitch)  ((pitch) * 32 + 0x100)  /* extra size to store VSC_SIZE */
+#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
+
+   device->vsc_data_pitch = 0x440 * 4;
+   device->vsc_data2_pitch = 0x1040 * 4;
+
+   result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data;
+
+   result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data2;
+
+   STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
+   result = tu_bo_init_new(device, &device->border_color, sizeof(border_color));
+   if (result != VK_SUCCESS)
+      goto fail_border_color;
+
+   result = tu_bo_map(device, &device->border_color);
+   if (result != VK_SUCCESS)
+      goto fail_border_color_map;
+
+   memcpy(device->border_color.map, border_color, sizeof(border_color));
 
    VkPipelineCacheCreateInfo ci;
    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
@@ -1103,14 +1228,27 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
    result =
       tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
    if (result != VK_SUCCESS)
-      goto fail;
+      goto fail_pipeline_cache;
 
    device->mem_cache = tu_pipeline_cache_from_handle(pc);
 
    *pDevice = tu_device_to_handle(device);
    return VK_SUCCESS;
 
-fail:
+fail_pipeline_cache:
+fail_border_color_map:
+   tu_bo_finish(device, &device->border_color);
+
+fail_border_color:
+   tu_bo_finish(device, &device->vsc_data2);
+
+fail_vsc_data2:
+   tu_bo_finish(device, &device->vsc_data);
+
+fail_vsc_data:
+   ralloc_free(device->compiler);
+
+fail_queues:
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
@@ -1118,9 +1256,6 @@ fail:
          vk_free(&device->alloc, device->queues[i]);
    }
 
-   if (device->compiler)
-      ralloc_free(device->compiler);
-
    vk_free(&device->alloc, device);
    return result;
 }
@@ -1133,6 +1268,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    if (!device)
       return;
 
+   tu_bo_finish(device, &device->vsc_data);
+   tu_bo_finish(device, &device->vsc_data2);
+
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
@@ -1307,6 +1445,44 @@ tu_DeviceWaitIdle(VkDevice _device)
    return VK_SUCCESS;
 }
 
+VkResult
+tu_ImportSemaphoreFdKHR(VkDevice _device,
+                        const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
+{
+   tu_stub();
+
+   return VK_SUCCESS;
+}
+
+VkResult
+tu_GetSemaphoreFdKHR(VkDevice _device,
+                     const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
+                     int *pFd)
+{
+   tu_stub();
+
+   return VK_SUCCESS; 
+}
+
+VkResult
+tu_ImportFenceFdKHR(VkDevice _device,
+                    const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
+{
+   tu_stub();
+
+   return VK_SUCCESS;
+}
+
+VkResult
+tu_GetFenceFdKHR(VkDevice _device,
+                 const VkFenceGetFdInfoKHR *pGetFdInfo,
+                 int *pFd)
+{
+   tu_stub();
+
+   return VK_SUCCESS;
+}
+
 VkResult
 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
                                         uint32_t *pPropertyCount,
@@ -1562,8 +1738,8 @@ tu_GetImageMemoryRequirements(VkDevice _device,
    TU_FROM_HANDLE(tu_image, image, _image);
 
    pMemoryRequirements->memoryTypeBits = 1;
-   pMemoryRequirements->size = image->size;
-   pMemoryRequirements->alignment = image->alignment;
+   pMemoryRequirements->size = image->layout.size;
+   pMemoryRequirements->alignment = image->layout.base_align;
 }
 
 void
@@ -1760,6 +1936,8 @@ tu_DestroyEvent(VkDevice _device,
 
    if (!event)
       return;
+
+   tu_bo_finish(device, &event->bo);
    vk_free2(&device->alloc, pAllocator, event);
 }
 
@@ -1889,7 +2067,7 @@ tu_DestroyFramebuffer(VkDevice _device,
 }
 
 static enum a6xx_tex_clamp
-tu6_tex_wrap(VkSamplerAddressMode address_mode, bool *needs_border)
+tu6_tex_wrap(VkSamplerAddressMode address_mode)
 {
    switch (address_mode) {
    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
@@ -1899,7 +2077,6 @@ tu6_tex_wrap(VkSamplerAddressMode address_mode, bool *needs_border)
    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
       return A6XX_TEX_CLAMP_TO_EDGE;
    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
-      *needs_border = true;
       return A6XX_TEX_CLAMP_TO_BORDER;
    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
       /* only works for PoT.. need to emulate otherwise! */
@@ -1925,6 +2102,12 @@ tu6_tex_filter(VkFilter filter, unsigned aniso)
    }
 }
 
+static inline enum adreno_compare_func
+tu6_compare_func(VkCompareOp op)
+{
+   return (enum adreno_compare_func) op;
+}
+
 static void
 tu_init_sampler(struct tu_device *device,
                 struct tu_sampler *sampler,
@@ -1933,33 +2116,35 @@ tu_init_sampler(struct tu_device *device,
    unsigned aniso = pCreateInfo->anisotropyEnable ?
       util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
    bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
-   bool needs_border = false;
 
-   sampler->state[0] =
+   sampler->descriptor[0] =
       COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
       A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
       A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
       A6XX_TEX_SAMP_0_ANISO(aniso) |
-      A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU, &needs_border)) |
-      A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV, &needs_border)) |
-      A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW, &needs_border)) |
+      A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
+      A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
+      A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
       A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
-   sampler->state[1] =
+   sampler->descriptor[1] =
       /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
       COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
       A6XX_TEX_SAMP_1_MIN_LOD(pCreateInfo->minLod) |
       A6XX_TEX_SAMP_1_MAX_LOD(pCreateInfo->maxLod) |
-      COND(pCreateInfo->compareEnable, A6XX_TEX_SAMP_1_COMPARE_FUNC(pCreateInfo->compareOp));
-   sampler->state[2] = 0;
-   sampler->state[3] = 0;
+      COND(pCreateInfo->compareEnable,
+           A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
+   /* This is an offset into the border_color BO, which we fill with all the
+    * possible Vulkan border colors in the correct order, so we can just use
+    * the Vulkan enum with no translation necessary.
+    */
+   sampler->descriptor[2] =
+      A6XX_TEX_SAMP_2_BCOLOR_OFFSET((unsigned) pCreateInfo->borderColor *
+                                    sizeof(struct bcolor_entry));
+   sampler->descriptor[3] = 0;
 
    /* TODO:
     * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
-    * border color
     */
-
-   sampler->needs_border = needs_border;
-   sampler->border = pCreateInfo->borderColor;
 }
 
 VkResult